`. \n\nFor example, you have a variable called \"var1\":\n```json\n{\n \"command\": \"docker\",\n \"args\": [\n \"run\",\n \"-i\",\n \"--rm\",\n \"-e\", \"API_TOKEN\"\n ],\n \"env\": {\n \"API_TOKEN\": \"{{$vars.var1}}\"\n }\n}\n```\n\nFor example, when using SSE, you can use the variable \"var1\" in the headers:\n```json\n{\n \"url\": \"https://api.example.com/endpoint/sse\",\n \"headers\": {\n \"Authorization\": \"Bearer {{$vars.var1}}\"\n }\n}\n```\n"
+ },
+ "placeholder": "{\n \"command\": \"npx\",\n \"args\": [\"-y\", \"@modelcontextprotocol/server-filesystem\", \"/path/to/allowed/files\"]\n}",
+ "id": "customMCP_0-input-mcpServerConfig-code",
+ "display": true
+ },
+ {
+ "label": "Available Actions",
+ "name": "mcpActions",
+ "type": "asyncMultiOptions",
+ "loadMethod": "listActions",
+ "refresh": true,
+ "id": "customMCP_0-input-mcpActions-asyncMultiOptions",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "mcpServerConfig": "{\n \"url\": \"http://172.17.0.1:5057/sse\",\n \"transport\": \"sse\",\n \"headers\": {\n \"x-flowise-session-id\": \"{{$flow.sessionId}}\",\n \"x-flowise-chat-id\": \"{{$flow.chatId}}\"\n }\n}",
+ "mcpActions": "[\"calculate\",\"cancel_pending_order\",\"exchange_delivered_order_items\",\"find_user_id_by_email\",\"find_user_id_by_name_zip\",\"get_order_details\",\"get_product_details\",\"get_user_details\",\"list_all_product_types\",\"modify_pending_order_address\",\"modify_pending_order_items\",\"modify_pending_order_payment\",\"modify_user_address\",\"return_delivered_order_items\",\"transfer_to_human_agents\"]"
+ },
+ "outputAnchors": [
+ {
+ "id": "customMCP_0-output-customMCP-Tool",
+ "name": "customMCP",
+ "label": "Custom MCP Tool",
+ "description": "Custom MCP Config",
+ "type": "Tool"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "width": 300,
+ "height": 969,
+ "selected": false,
+ "positionAbsolute": {
+ "x": 884.247485310339,
+ "y": -404.5115261233922
+ },
+ "dragging": false
+ }
+ ],
+ "edges": [
+ {
+ "source": "bufferMemory_0",
+ "sourceHandle": "bufferMemory_0-output-bufferMemory-BufferMemory|BaseChatMemory|BaseMemory",
+ "target": "toolAgent_0",
+ "targetHandle": "toolAgent_0-input-memory-BaseChatMemory",
+ "type": "buttonedge",
+ "id": "bufferMemory_0-bufferMemory_0-output-bufferMemory-BufferMemory|BaseChatMemory|BaseMemory-toolAgent_0-toolAgent_0-input-memory-BaseChatMemory"
+ },
+ {
+ "source": "chatOpenAICustom_0",
+ "sourceHandle": "chatOpenAICustom_0-output-chatOpenAICustom-ChatOpenAI-Custom|BaseChatOpenAI|BaseChatModel|BaseLanguageModel|Runnable",
+ "target": "toolAgent_0",
+ "targetHandle": "toolAgent_0-input-model-BaseChatModel",
+ "type": "buttonedge",
+ "id": "chatOpenAICustom_0-chatOpenAICustom_0-output-chatOpenAICustom-ChatOpenAI-Custom|BaseChatOpenAI|BaseChatModel|BaseLanguageModel|Runnable-toolAgent_0-toolAgent_0-input-model-BaseChatModel",
+ "selected": false
+ },
+ {
+ "source": "customMCP_0",
+ "sourceHandle": "customMCP_0-output-customMCP-Tool",
+ "target": "toolAgent_0",
+ "targetHandle": "toolAgent_0-input-tools-Tool",
+ "type": "buttonedge",
+ "id": "customMCP_0-customMCP_0-output-customMCP-Tool-toolAgent_0-toolAgent_0-input-tools-Tool"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_retail.json b/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_retail.json
new file mode 100644
index 00000000..f4b245c7
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_retail.json
@@ -0,0 +1,868 @@
+{
+ "nodes": [
+ {
+ "id": "startAgentflow_0",
+ "type": "agentFlow",
+ "position": {
+ "x": -218.5,
+ "y": 116
+ },
+ "data": {
+ "id": "startAgentflow_0",
+ "label": "Start",
+ "version": 1.1,
+ "name": "startAgentflow",
+ "type": "Start",
+ "color": "#7EE787",
+ "hideInput": true,
+ "baseClasses": [
+ "Start"
+ ],
+ "category": "Agent Flows",
+ "description": "Starting point of the agentflow",
+ "inputParams": [
+ {
+ "label": "Input Type",
+ "name": "startInputType",
+ "type": "options",
+ "options": [
+ {
+ "label": "Chat Input",
+ "name": "chatInput",
+ "description": "Start the conversation with chat input"
+ },
+ {
+ "label": "Form Input",
+ "name": "formInput",
+ "description": "Start the workflow with form inputs"
+ }
+ ],
+ "default": "chatInput",
+ "id": "startAgentflow_0-input-startInputType-options",
+ "display": true
+ },
+ {
+ "label": "Form Title",
+ "name": "formTitle",
+ "type": "string",
+ "placeholder": "Please Fill Out The Form",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "id": "startAgentflow_0-input-formTitle-string",
+ "display": false
+ },
+ {
+ "label": "Form Description",
+ "name": "formDescription",
+ "type": "string",
+ "placeholder": "Complete all fields below to continue",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "id": "startAgentflow_0-input-formDescription-string",
+ "display": false
+ },
+ {
+ "label": "Form Input Types",
+ "name": "formInputTypes",
+ "description": "Specify the type of form input",
+ "type": "array",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "array": [
+ {
+ "label": "Type",
+ "name": "type",
+ "type": "options",
+ "options": [
+ {
+ "label": "String",
+ "name": "string"
+ },
+ {
+ "label": "Number",
+ "name": "number"
+ },
+ {
+ "label": "Boolean",
+ "name": "boolean"
+ },
+ {
+ "label": "Options",
+ "name": "options"
+ }
+ ],
+ "default": "string"
+ },
+ {
+ "label": "Label",
+ "name": "label",
+ "type": "string",
+ "placeholder": "Label for the input"
+ },
+ {
+ "label": "Variable Name",
+ "name": "name",
+ "type": "string",
+ "placeholder": "Variable name for the input (must be camel case)",
+ "description": "Variable name must be camel case. For example: firstName, lastName, etc."
+ },
+ {
+ "label": "Add Options",
+ "name": "addOptions",
+ "type": "array",
+ "show": {
+ "formInputTypes[$index].type": "options"
+ },
+ "array": [
+ {
+ "label": "Option",
+ "name": "option",
+ "type": "string"
+ }
+ ]
+ }
+ ],
+ "id": "startAgentflow_0-input-formInputTypes-array",
+ "display": false
+ },
+ {
+ "label": "Ephemeral Memory",
+ "name": "startEphemeralMemory",
+ "type": "boolean",
+ "description": "Start fresh for every execution without past chat history",
+ "optional": true,
+ "id": "startAgentflow_0-input-startEphemeralMemory-boolean",
+ "display": true
+ },
+ {
+ "label": "Flow State",
+ "name": "startState",
+ "description": "Runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "string",
+ "placeholder": "Foo"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "placeholder": "Bar",
+ "optional": true
+ }
+ ],
+ "id": "startAgentflow_0-input-startState-array",
+ "display": true
+ },
+ {
+ "label": "Persist State",
+ "name": "startPersistState",
+ "type": "boolean",
+ "description": "Persist the state in the same session",
+ "optional": true,
+ "id": "startAgentflow_0-input-startPersistState-boolean",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "startInputType": "chatInput",
+ "formTitle": "",
+ "formDescription": "",
+ "formInputTypes": "",
+ "startEphemeralMemory": "",
+ "startState": "",
+ "startPersistState": ""
+ },
+ "outputAnchors": [
+ {
+ "id": "startAgentflow_0-output-startAgentflow",
+ "label": "Start",
+ "name": "startAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "width": 103,
+ "height": 66,
+ "positionAbsolute": {
+ "x": -218.5,
+ "y": 116
+ },
+ "selected": false,
+ "dragging": false
+ },
+ {
+ "id": "agentAgentflow_0",
+ "position": {
+ "x": 143,
+ "y": 99.25
+ },
+ "data": {
+ "id": "agentAgentflow_0",
+ "label": "Agent 0",
+ "version": 3.2,
+ "name": "agentAgentflow",
+ "type": "Agent",
+ "color": "#4DD0E1",
+ "baseClasses": [
+ "Agent"
+ ],
+ "category": "Agent Flows",
+ "description": "Dynamically choose and utilize tools during runtime, enabling multi-step reasoning",
+ "inputParams": [
+ {
+ "label": "Model",
+ "name": "agentModel",
+ "type": "asyncOptions",
+ "loadMethod": "listModels",
+ "loadConfig": true,
+ "id": "agentAgentflow_0-input-agentModel-asyncOptions",
+ "display": true
+ },
+ {
+ "label": "Messages",
+ "name": "agentMessages",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Role",
+ "name": "role",
+ "type": "options",
+ "options": [
+ {
+ "label": "System",
+ "name": "system"
+ },
+ {
+ "label": "Assistant",
+ "name": "assistant"
+ },
+ {
+ "label": "Developer",
+ "name": "developer"
+ },
+ {
+ "label": "User",
+ "name": "user"
+ }
+ ]
+ },
+ {
+ "label": "Content",
+ "name": "content",
+ "type": "string",
+ "acceptVariable": true,
+ "generateInstruction": true,
+ "rows": 4
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentMessages-array",
+ "display": true
+ },
+ {
+ "label": "OpenAI Built-in Tools",
+ "name": "agentToolsBuiltInOpenAI",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "Web Search",
+ "name": "web_search_preview",
+ "description": "Search the web for the latest information"
+ },
+ {
+ "label": "Code Interpreter",
+ "name": "code_interpreter",
+ "description": "Write and run Python code in a sandboxed environment"
+ },
+ {
+ "label": "Image Generation",
+ "name": "image_generation",
+ "description": "Generate images based on a text prompt"
+ }
+ ],
+ "show": {
+ "agentModel": "chatOpenAI"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInOpenAI-multiOptions",
+ "display": true
+ },
+ {
+ "label": "Gemini Built-in Tools",
+ "name": "agentToolsBuiltInGemini",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "URL Context",
+ "name": "urlContext",
+ "description": "Extract content from given URLs"
+ },
+ {
+ "label": "Google Search",
+ "name": "googleSearch",
+ "description": "Search real-time web content"
+ },
+ {
+ "label": "Code Execution",
+ "name": "codeExecution",
+ "description": "Write and run Python code in a sandboxed environment"
+ }
+ ],
+ "show": {
+ "agentModel": "chatGoogleGenerativeAI"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInGemini-multiOptions",
+ "display": false
+ },
+ {
+ "label": "Anthropic Built-in Tools",
+ "name": "agentToolsBuiltInAnthropic",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "Web Search",
+ "name": "web_search_20250305",
+ "description": "Search the web for the latest information"
+ },
+ {
+ "label": "Web Fetch",
+ "name": "web_fetch_20250910",
+ "description": "Retrieve full content from specified web pages"
+ }
+ ],
+ "show": {
+ "agentModel": "chatAnthropic"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInAnthropic-multiOptions",
+ "display": false
+ },
+ {
+ "label": "Tools",
+ "name": "agentTools",
+ "type": "array",
+ "optional": true,
+ "array": [
+ {
+ "label": "Tool",
+ "name": "agentSelectedTool",
+ "type": "asyncOptions",
+ "loadMethod": "listTools",
+ "loadConfig": true
+ },
+ {
+ "label": "Require Human Input",
+ "name": "agentSelectedToolRequiresHumanInput",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentTools-array",
+ "display": true
+ },
+ {
+ "label": "Knowledge (Document Stores)",
+ "name": "agentKnowledgeDocumentStores",
+ "type": "array",
+ "description": "Give your agent context about different document sources. Document stores must be upserted in advance.",
+ "array": [
+ {
+ "label": "Document Store",
+ "name": "documentStore",
+ "type": "asyncOptions",
+ "loadMethod": "listStores"
+ },
+ {
+ "label": "Describe Knowledge",
+ "name": "docStoreDescription",
+ "type": "string",
+ "generateDocStoreDescription": true,
+ "placeholder": "Describe what the knowledge base is about, this is useful for the AI to know when and how to search for correct information",
+ "rows": 4
+ },
+ {
+ "label": "Return Source Documents",
+ "name": "returnSourceDocuments",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentKnowledgeDocumentStores-array",
+ "display": true
+ },
+ {
+ "label": "Knowledge (Vector Embeddings)",
+ "name": "agentKnowledgeVSEmbeddings",
+ "type": "array",
+ "description": "Give your agent context about different document sources from existing vector stores and embeddings",
+ "array": [
+ {
+ "label": "Vector Store",
+ "name": "vectorStore",
+ "type": "asyncOptions",
+ "loadMethod": "listVectorStores",
+ "loadConfig": true
+ },
+ {
+ "label": "Embedding Model",
+ "name": "embeddingModel",
+ "type": "asyncOptions",
+ "loadMethod": "listEmbeddings",
+ "loadConfig": true
+ },
+ {
+ "label": "Knowledge Name",
+ "name": "knowledgeName",
+ "type": "string",
+ "placeholder": "A short name for the knowledge base, this is useful for the AI to know when and how to search for correct information"
+ },
+ {
+ "label": "Describe Knowledge",
+ "name": "knowledgeDescription",
+ "type": "string",
+ "placeholder": "Describe what the knowledge base is about, this is useful for the AI to know when and how to search for correct information",
+ "rows": 4
+ },
+ {
+ "label": "Return Source Documents",
+ "name": "returnSourceDocuments",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentKnowledgeVSEmbeddings-array",
+ "display": true
+ },
+ {
+ "label": "Enable Memory",
+ "name": "agentEnableMemory",
+ "type": "boolean",
+ "description": "Enable memory for the conversation thread",
+ "default": true,
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentEnableMemory-boolean",
+ "display": true
+ },
+ {
+ "label": "Memory Type",
+ "name": "agentMemoryType",
+ "type": "options",
+ "options": [
+ {
+ "label": "All Messages",
+ "name": "allMessages",
+ "description": "Retrieve all messages from the conversation"
+ },
+ {
+ "label": "Window Size",
+ "name": "windowSize",
+ "description": "Uses a fixed window size to surface the last N messages"
+ },
+ {
+ "label": "Conversation Summary",
+ "name": "conversationSummary",
+ "description": "Summarizes the whole conversation"
+ },
+ {
+ "label": "Conversation Summary Buffer",
+ "name": "conversationSummaryBuffer",
+ "description": "Summarize conversations once token limit is reached. Default to 2000"
+ }
+ ],
+ "optional": true,
+ "default": "allMessages",
+ "show": {
+ "agentEnableMemory": true
+ },
+ "id": "agentAgentflow_0-input-agentMemoryType-options",
+ "display": true
+ },
+ {
+ "label": "Window Size",
+ "name": "agentMemoryWindowSize",
+ "type": "number",
+ "default": "20",
+ "description": "Uses a fixed window size to surface the last N messages",
+ "show": {
+ "agentMemoryType": "windowSize"
+ },
+ "id": "agentAgentflow_0-input-agentMemoryWindowSize-number",
+ "display": false
+ },
+ {
+ "label": "Max Token Limit",
+ "name": "agentMemoryMaxTokenLimit",
+ "type": "number",
+ "default": "2000",
+ "description": "Summarize conversations once token limit is reached. Default to 2000",
+ "show": {
+ "agentMemoryType": "conversationSummaryBuffer"
+ },
+ "id": "agentAgentflow_0-input-agentMemoryMaxTokenLimit-number",
+ "display": false
+ },
+ {
+ "label": "Input Message",
+ "name": "agentUserMessage",
+ "type": "string",
+ "description": "Add an input message as user message at the end of the conversation",
+ "rows": 4,
+ "optional": true,
+ "acceptVariable": true,
+ "show": {
+ "agentEnableMemory": true
+ },
+ "id": "agentAgentflow_0-input-agentUserMessage-string",
+ "display": true
+ },
+ {
+ "label": "Return Response As",
+ "name": "agentReturnResponseAs",
+ "type": "options",
+ "options": [
+ {
+ "label": "User Message",
+ "name": "userMessage"
+ },
+ {
+ "label": "Assistant Message",
+ "name": "assistantMessage"
+ }
+ ],
+ "default": "userMessage",
+ "id": "agentAgentflow_0-input-agentReturnResponseAs-options",
+ "display": true
+ },
+ {
+ "label": "JSON Structured Output",
+ "name": "agentStructuredOutput",
+ "description": "Instruct the Agent to give output in a JSON structured schema",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "string"
+ },
+ {
+ "label": "Type",
+ "name": "type",
+ "type": "options",
+ "options": [
+ {
+ "label": "String",
+ "name": "string"
+ },
+ {
+ "label": "String Array",
+ "name": "stringArray"
+ },
+ {
+ "label": "Number",
+ "name": "number"
+ },
+ {
+ "label": "Boolean",
+ "name": "boolean"
+ },
+ {
+ "label": "Enum",
+ "name": "enum"
+ },
+ {
+ "label": "JSON Array",
+ "name": "jsonArray"
+ }
+ ]
+ },
+ {
+ "label": "Enum Values",
+ "name": "enumValues",
+ "type": "string",
+ "placeholder": "value1, value2, value3",
+ "description": "Enum values. Separated by comma",
+ "optional": true,
+ "show": {
+ "agentStructuredOutput[$index].type": "enum"
+ }
+ },
+ {
+ "label": "JSON Schema",
+ "name": "jsonSchema",
+ "type": "code",
+ "placeholder": "{\n \"answer\": {\n \"type\": \"string\",\n \"description\": \"Value of the answer\"\n },\n \"reason\": {\n \"type\": \"string\",\n \"description\": \"Reason for the answer\"\n },\n \"optional\": {\n \"type\": \"boolean\"\n },\n \"count\": {\n \"type\": \"number\"\n },\n \"children\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"value\": {\n \"type\": \"string\",\n \"description\": \"Value of the children's answer\"\n }\n }\n }\n }\n}",
+ "description": "JSON schema for the structured output",
+ "optional": true,
+ "hideCodeExecute": true,
+ "show": {
+ "agentStructuredOutput[$index].type": "jsonArray"
+ }
+ },
+ {
+ "label": "Description",
+ "name": "description",
+ "type": "string",
+ "placeholder": "Description of the key"
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentStructuredOutput-array",
+ "display": true
+ },
+ {
+ "label": "Update Flow State",
+ "name": "agentUpdateState",
+ "description": "Update runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "asyncOptions",
+ "loadMethod": "listRuntimeStateKeys"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "acceptVariable": true,
+ "acceptNodeOutputAsVariable": true
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentUpdateState-array",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "agentModel": "chatOpenAICustom",
+ "agentMessages": [
+ {
+ "role": "system",
+ "content": "<instructions>
You are a customer service agent that helps the user according to the <policy> provided below.
============================================================
SECTION 1 — GENERIC EXECUTE_PYTHON INSTRUCTIONS
============================================================
HOW YOU WORK:
You have ONE tool: execute_python. It runs Python code in a sandbox.
Inside that sandbox, an actions object is pre-loaded with methods you call to perform operations (look up records, search options, modify state, etc.).
The full list of available actions and their parameters is documented in the execute_python tool description — READ IT before writing code.
TURN STRUCTURE (STRICT):
In each turn you must do EXACTLY ONE of the following — never both:
A) Send a text message to the user, OR
B) Make an execute_python call.
You MUST NOT combine a message and a tool call in the same turn.
In particular, if execute_python returned an error, your next turn must be ONLY an execute_python call with fixed code — no text, no apology, no explanation.
CRITICAL — WRITE COMPLETE SCRIPTS:
A single execute_python call can contain MANY actions.* calls chained together in one script.
You MUST combine all related steps into ONE execute_python call.
Do NOT make separate execute_python calls for each individual action.
Think about what information you need, then write ONE script that gathers and processes ALL of it.
Only make a second execute_python call if the first one fails or if you need user input before continuing.
SANDBOX ENVIRONMENT:
Your code runs in a restricted Python sandbox. These constraints apply:
- ALLOWED IMPORTS: json, math, datetime, time, re, collections, itertools — nothing else
- NO file I/O: open(), os, sys, subprocess, pathlib are all blocked
- NO exec(), eval(), compile()
- NO dunder attributes: name, class, dict etc. are blocked
- NO input(): extract information from the conversation instead
- The actions object is ALREADY AVAILABLE — do not import it
CRITICAL INPUT BAN (HIGHEST PRIORITY):
- NEVER use input() in execute_python code.
- NEVER use placeholder variables such as input, Input, or user_input.
- NEVER write code like email = input(...) or value = Input.
- If any value is missing, ask the user in a normal assistant message (not in Python code).
- In execute_python code, only use values already present in conversation/tool outputs.
- Any script using input() is invalid and must be rewritten before execution.
CODE GENERATION RULES:
1. Treat actions.* outputs by TYPE:
- If output is structured data (object/list), parse with json.loads() before field access.
- If output is a scalar (e.g., identifier/status string), use it directly.
Safe pattern:
import json
raw = actions.ANY_METHOD(args)
# parse when raw looks like structured JSON; otherwise use raw directly
data = json.loads(raw) if isinstance(raw, str) and raw.lstrip().startswith('[') else raw
# for JSON objects, use the same idea: parse when output is structured JSON text
❌ WRONG — accessing fields on the raw JSON string:
result = actions.some_write_method(...)
print(result['status']) # CRASH: result is a STRING, not a dict
✅ CORRECT — parse first, then access:
result = json.loads(actions.some_write_method(...))
print(result['status'])
2. ALWAYS print results — print() is the ONLY way to see output:
print(data)
3. DICT vs LIST — read the signature line for each action:
Many actions return DICTS (not lists). Iterating a dict gives you KEYS (strings), not objects.
❌ WRONG — treats dict as list of objects:
for item in data:
print(item['name']) # CRASH: item is a string key, not a dict
✅ CORRECT — use .items() for dicts:
for key, value in data.items():
print(key, value)
When unsure, print the data first: print(type(data), data)
4. STATELESS: Variables do NOT persist between execute_python calls.
Put ALL steps in ONE script.
5. NEVER fabricate identifiers or option values.
Extract concrete values from tool outputs and reuse them exactly.
Never pass placeholder tokens like \"user_id\", \"order_id\", \"item_id\", \"payment_method_id\"
as actual values. Those are parameter NAMES, not real values.
6. PRE-FLIGHT CHECKLIST before any state-changing action:
Before calling any write action, verify all required arguments come from current
data in THIS script and satisfy preconditions.
Generic checks:
- Every argument variable is defined before use (no undefined names).
- No input()/Input/user_input usage anywhere in the script.
- Entities referenced by the action are confirmed to exist in retrieved data.
- Current state allows the action (e.g., status/eligibility/business constraints).
- Argument values are concrete runtime values (not placeholder strings).
Pattern:
# 1) Read current state
state = json.loads(actions.some_read_method(...))
print(state)
# 2) Validate preconditions and required values
# (extract concrete values from state; do not invent placeholders)
# 3) Execute write action only after checks
result = json.loads(actions.some_write_method(...))
print(result)
7. LOOKUP + EXECUTION GUARDRAILS (GENERIC):
A) LOOKUP FALLBACK (max 2 tries):
- Try one primary lookup strategy.
- If it fails, try one alternative strategy.
- If both fail, DO NOT guess values and DO NOT run write actions.
Ask the user for the missing value(s) in a normal assistant message.
B) NEVER USE PLACEHOLDER LITERALS AS REAL VALUES:
Parameter names are not runtime values.
Forbidden as actual values: \"user_id\", \"order_id\", \"item_id\", \"product_id\",
\"payment_method_id\", \"email\", \"first_name\", \"last_name\", \"zip\", etc.
C) READ-THEN-WRITE DISCIPLINE:
Before any write action, explicitly read current state and confirm:
- target entity exists
- operation is allowed in current state
- selected replacement/option/value is valid in current data
D) ARGUMENT SANITY CHECKS BEFORE WRITE:
For each required argument:
- it is defined in this script
- it is non-empty and concrete (not placeholder text)
- it was derived from conversation/tool output
Print chosen argument values before the write call.
E) TWO-PHASE SCRIPT TEMPLATE:
- Phase 1: gather and print all required facts
- Phase 2: execute write action only if checks pass
8. MANDATORY PRE-EXECUTION SELF-CHECK (RUN THIS MENTALLY BEFORE EVERY execute_python CALL):
- No input(), input, Input, or user_input anywhere in code.
- No placeholder literals used as real values \"user_id\", \"order_id\", etc.).
- Every variable used in arguments is defined in THIS script.
- For every critical value, print it before write actions.
- If a lookup failed twice, DO NOT continue with write actions; ask user for corrected value.
9. STRICT LOOKUP FAILURE RULE:
If authentication/lookup fails (target entity not found), you MUST stop mutation flow.
Do not call write actions after unresolved lookup failure.
Your next action is either:
- one alternative lookup attempt, OR
- ask the user for corrected identifying information.
SILENT ERROR RECOVERY (MANDATORY):
When execute_python returns an error, you MUST:
1. NOT produce any text message — no apology, no explanation, no \"let me try again\"
2. Immediately make another execute_python call with corrected code
3. The user must NEVER know an error occurred
This means your very next turn after an error is ONLY a tool call — zero words to the user.
❌ WRONG: \"I apologize for the error. Let me try a different approach.\" + execute_python
❌ WRONG: \"Let me fix that.\" + execute_python
❌ WRONG: Any text at all before or alongside the retry
✅ CORRECT: execute_python (with fixed code, no accompanying text)
Only speak to the user when you have a successful result or need information from them.
If after 3 retries you still cannot complete the task, you may say there is a temporary technical issue — but never mention implementation details.
ANTI-LOOP GUARD (GENERIC):
- If you hit the same error class twice in a row (e.g., repeated \"not found\" or repeated type/index error),
stop retrying variations and switch strategy:
1) one alternative lookup/validation path, then
2) ask user for the missing/corrected value if still unresolved.
- Do not burn steps by repeating near-identical failing scripts.
============================================================
SECTION 2 — USE-CASE SPECIFIC EXAMPLES (RETAIL)
============================================================
CRITICAL — WRITE COMPLETE SCRIPTS EXAMPLE:
❌ WRONG (multiple execute_python calls, one action each):
Turn 1: execute_python → actions.find_user_id_by_email(\"user@example.com\")
Turn 2: execute_python → actions.get_user_details(\"user_123\")
Turn 3: execute_python → actions.get_order_details(\"#W456\")
✅ CORRECT (one execute_python call with a complete script):
execute_python →
import json
user_id = actions.find_user_id_by_email(\"user@example.com\")
print(f\"User ID: {{ user_id }}\")
user = json.loads(actions.get_user_details(user_id))
print(f\"Name: {{ user['name'] }}\")
order = json.loads(actions.get_order_details(\"#W456\"))
print(f\"Order status: {{ order['status'] }}\")
IDENTIFIER REUSE EXAMPLE:
user = json.loads(actions.get_user_details(user_id))
for pm_id, pm_info in user['payment_methods'].items():
print(f\"{{ pm_id }}: {{ pm_info }}\") # pm_id IS the payment method ID — use it exactly
TRANSFER TO HUMAN AGENT:
To transfer, make an execute_python call with code: actions.transfer_to_human_agents(\"summary of the issue\"), then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
</instructions>
<policy>
# Retail agent policy
As a retail agent, you can help users:
- cancel or modify pending orders
- return or exchange delivered orders
- modify their default user address
- provide information about their own profile, orders, and related products
At the beginning of the conversation, you have to authenticate the user identity by locating their user id via email, or via name + zip code. This has to be done even when the user already provides the user id.
Once the user has been authenticated, you can provide the user with information about order, product, profile information, e.g. help the user look up order id.
You can only help one user per conversation (but you can handle multiple requests from the same user), and must deny any requests for tasks related to any other user.
Before taking any action that updates the database (cancel, modify, return, exchange), you must list the action details and obtain explicit user confirmation (yes) to proceed.
You should not make up any information or knowledge or procedures not provided by the user or the tools, or give subjective recommendations or comments.
You should at most make one tool call at a time, and if you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call at the same time.
You should deny user requests that are against this policy.
You should transfer the user to a human agent if and only if the request cannot be handled within the scope of your actions. To transfer, first make a tool call to transfer_to_human_agents, and then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
## Domain basic
- All times in the database are EST and 24 hour based. For example \"02:30:00\" means 2:30 AM EST.
### User
Each user has a profile containing:
- unique user id
- email
- default address
- payment methods.
There are three types of payment methods: gift card, paypal account, credit card.
### Product
Our retail store has 50 types of products.
For each type of product, there are variant items of different options.
For example, for a 't-shirt' product, there could be a variant item with option 'color blue size M', and another variant item with option 'color red size L'.
Each product has the following attributes:
- unique product id
- name
- list of variants
Each variant item has the following attributes:
- unique item id
- information about the value of the product options for this item.
- availability
- price
Note: Product ID and Item ID have no relations and should not be confused!
### Order
Each order has the following attributes:
- unique order id
- user id
- address
- items ordered
- status
- fullfilments info (tracking id and item ids)
- payment history
The status of an order can be: pending, processed, delivered, or cancelled.
Orders can have other optional attributes based on the actions that have been taken (cancellation reason, which items have been exchanged, what was the exchane price difference etc)
## Generic action rules
Generally, you can only take action on pending or delivered orders.
Exchange or modify order tools can only be called once per order. Be sure that all items to be changed are collected into a list before making the tool call!!!
## Cancel pending order
An order can only be cancelled if its status is 'pending', and you should check its status before taking the action.
The user needs to confirm the order id and the reason (either 'no longer needed' or 'ordered by mistake') for cancellation. Other reasons are not acceptable.
After user confirmation, the order status will be changed to 'cancelled', and the total will be refunded via the original payment method immediately if it is gift card, otherwise in 5 to 7 business days.
## Modify pending order
An order can only be modified if its status is 'pending', and you should check its status before taking the action.
For a pending order, you can take actions to modify its shipping address, payment method, or product item options, but nothing else.
### Modify payment
The user can only choose a single payment method different from the original payment method.
If the user wants the modify the payment method to gift card, it must have enough balance to cover the total amount.
After user confirmation, the order status will be kept as 'pending'. The original payment method will be refunded immediately if it is a gift card, otherwise it will be refunded within 5 to 7 business days.
### Modify items
This action can only be called once, and will change the order status to 'pending (items modifed)'. The agent will not be able to modify or cancel the order anymore. So you must confirm all the details are correct and be cautious before taking this action. In particular, remember to remind the customer to confirm they have provided all the items they want to modify.
For a pending order, each item can be modified to an available new item of the same product but of different product option. There cannot be any change of product types, e.g. modify shirt to shoe.
The user must provide a payment method to pay or receive refund of the price difference. If the user provides a gift card, it must have enough balance to cover the price difference.
## Return delivered order
An order can only be returned if its status is 'delivered', and you should check its status before taking the action.
The user needs to confirm the order id and the list of items to be returned.
The user needs to provide a payment method to receive the refund.
The refund must either go to the original payment method, or an existing gift card.
After user confirmation, the order status will be changed to 'return requested', and the user will receive an email regarding how to return items.
## Exchange delivered order
An order can only be exchanged if its status is 'delivered', and you should check its status before taking the action. In particular, remember to remind the customer to confirm they have provided all items to be exchanged.
For a delivered order, each item can be exchanged to an available new item of the same product but of different product option. There cannot be any change of product types, e.g. modify shirt to shoe.
The user must provide a payment method to pay or receive refund of the price difference. If the user provides a gift card, it must have enough balance to cover the price difference.
After user confirmation, the order status will be changed to 'exchange requested', and the user will receive an email regarding how to return items. There is no need to place a new order.
</policy>
"
+ }
+ ],
+ "agentToolsBuiltInOpenAI": "",
+ "agentTools": [
+ {
+ "agentSelectedTool": "customMCP",
+ "agentSelectedToolRequiresHumanInput": false,
+ "agentSelectedToolConfig": {
+ "mcpServerConfig": "{\n \"url\": \"http://172.17.0.1:5051/sse\",\n \"transport\": \"sse\"\n}",
+ "mcpActions": "[\"execute_python\",\"list_available_actions\"]",
+ "agentSelectedTool": "customMCP"
+ }
+ }
+ ],
+ "agentKnowledgeDocumentStores": "",
+ "agentKnowledgeVSEmbeddings": "",
+ "agentEnableMemory": true,
+ "agentMemoryType": "allMessages",
+ "agentUserMessage": "",
+ "agentReturnResponseAs": "userMessage",
+ "agentStructuredOutput": "",
+ "agentUpdateState": "",
+ "agentModelConfig": {
+ "cache": "",
+ "modelName": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+ "temperature": "0.0",
+ "streaming": true,
+ "maxTokens": "",
+ "topP": "",
+ "frequencyPenalty": "",
+ "presencePenalty": "",
+ "timeout": "",
+ "basepath": "http://172.17.0.1:8002/v1",
+ "baseOptions": "",
+ "agentModel": "chatOpenAICustom"
+ },
+ "undefined": ""
+ },
+ "outputAnchors": [
+ {
+ "id": "agentAgentflow_0-output-agentAgentflow",
+ "label": "Agent",
+ "name": "agentAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "type": "agentFlow",
+ "width": 320,
+ "height": 100,
+ "selected": false,
+ "dragging": false,
+ "positionAbsolute": {
+ "x": 143,
+ "y": 99.25
+ }
+ },
+ {
+ "id": "toolAgentflow_0",
+ "position": {
+ "x": -42.5,
+ "y": 116
+ },
+ "data": {
+ "id": "toolAgentflow_0",
+ "label": "Tool 0",
+ "version": 1.2,
+ "name": "toolAgentflow",
+ "type": "Tool",
+ "color": "#d4a373",
+ "baseClasses": [
+ "Tool"
+ ],
+ "category": "Agent Flows",
+ "description": "Tools allow LLM to interact with external systems",
+ "inputParams": [
+ {
+ "label": "Tool",
+ "name": "toolAgentflowSelectedTool",
+ "type": "asyncOptions",
+ "loadMethod": "listTools",
+ "loadConfig": true,
+ "id": "toolAgentflow_0-input-toolAgentflowSelectedTool-asyncOptions",
+ "display": true
+ },
+ {
+ "label": "Tool Input Arguments",
+ "name": "toolInputArgs",
+ "type": "array",
+ "acceptVariable": true,
+ "refresh": true,
+ "array": [
+ {
+ "label": "Input Argument Name",
+ "name": "inputArgName",
+ "type": "asyncOptions",
+ "loadMethod": "listToolInputArgs",
+ "refresh": true
+ },
+ {
+ "label": "Input Argument Value",
+ "name": "inputArgValue",
+ "type": "string",
+ "acceptVariable": true
+ }
+ ],
+ "show": {
+ "toolAgentflowSelectedTool": ".+"
+ },
+ "id": "toolAgentflow_0-input-toolInputArgs-array",
+ "display": true
+ },
+ {
+ "label": "Update Flow State",
+ "name": "toolUpdateState",
+ "description": "Update runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "asyncOptions",
+ "loadMethod": "listRuntimeStateKeys"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "acceptVariable": true,
+ "acceptNodeOutputAsVariable": true
+ }
+ ],
+ "id": "toolAgentflow_0-input-toolUpdateState-array",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "toolAgentflowSelectedTool": "customMCP",
+ "toolInputArgs": [
+ {
+ "inputArgName": "sessionId",
+ "inputArgValue": "{{ $flow.sessionId }}
"
+ },
+ {
+ "inputArgName": "chatId",
+ "inputArgValue": "{{ $flow.chatId }}
"
+ }
+ ],
+ "toolUpdateState": "",
+ "toolAgentflowSelectedToolConfig": {
+ "mcpServerConfig": "{\n \"url\": \"http://172.17.0.1:5051/sse\",\n \"transport\": \"sse\"\n}",
+ "mcpActions": "[\"get_session_id\"]",
+ "toolAgentflowSelectedTool": "customMCP"
+ }
+ },
+ "outputAnchors": [
+ {
+ "id": "toolAgentflow_0-output-toolAgentflow",
+ "label": "Tool",
+ "name": "toolAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "type": "agentFlow",
+ "width": 112,
+ "height": 68,
+ "selected": false,
+ "positionAbsolute": {
+ "x": -42.5,
+ "y": 116
+ },
+ "dragging": false
+ }
+ ],
+ "edges": [
+ {
+ "source": "startAgentflow_0",
+ "sourceHandle": "startAgentflow_0-output-startAgentflow",
+ "target": "toolAgentflow_0",
+ "targetHandle": "toolAgentflow_0",
+ "data": {
+ "sourceColor": "#7EE787",
+ "targetColor": "#d4a373",
+ "isHumanInput": false
+ },
+ "type": "agentFlow",
+ "id": "startAgentflow_0-startAgentflow_0-output-startAgentflow-toolAgentflow_0-toolAgentflow_0"
+ },
+ {
+ "source": "toolAgentflow_0",
+ "sourceHandle": "toolAgentflow_0-output-toolAgentflow",
+ "target": "agentAgentflow_0",
+ "targetHandle": "agentAgentflow_0",
+ "data": {
+ "sourceColor": "#d4a373",
+ "targetColor": "#4DD0E1",
+ "isHumanInput": false
+ },
+ "type": "agentFlow",
+ "id": "toolAgentflow_0-toolAgentflow_0-output-toolAgentflow-agentAgentflow_0-agentAgentflow_0"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_stocks.json b/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_stocks.json
new file mode 100644
index 00000000..a2954213
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_stocks.json
@@ -0,0 +1,868 @@
+{
+ "nodes": [
+ {
+ "id": "startAgentflow_0",
+ "type": "agentFlow",
+ "position": {
+ "x": -218.5,
+ "y": 116
+ },
+ "data": {
+ "id": "startAgentflow_0",
+ "label": "Start",
+ "version": 1.1,
+ "name": "startAgentflow",
+ "type": "Start",
+ "color": "#7EE787",
+ "hideInput": true,
+ "baseClasses": [
+ "Start"
+ ],
+ "category": "Agent Flows",
+ "description": "Starting point of the agentflow",
+ "inputParams": [
+ {
+ "label": "Input Type",
+ "name": "startInputType",
+ "type": "options",
+ "options": [
+ {
+ "label": "Chat Input",
+ "name": "chatInput",
+ "description": "Start the conversation with chat input"
+ },
+ {
+ "label": "Form Input",
+ "name": "formInput",
+ "description": "Start the workflow with form inputs"
+ }
+ ],
+ "default": "chatInput",
+ "id": "startAgentflow_0-input-startInputType-options",
+ "display": true
+ },
+ {
+ "label": "Form Title",
+ "name": "formTitle",
+ "type": "string",
+ "placeholder": "Please Fill Out The Form",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "id": "startAgentflow_0-input-formTitle-string",
+ "display": false
+ },
+ {
+ "label": "Form Description",
+ "name": "formDescription",
+ "type": "string",
+ "placeholder": "Complete all fields below to continue",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "id": "startAgentflow_0-input-formDescription-string",
+ "display": false
+ },
+ {
+ "label": "Form Input Types",
+ "name": "formInputTypes",
+ "description": "Specify the type of form input",
+ "type": "array",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "array": [
+ {
+ "label": "Type",
+ "name": "type",
+ "type": "options",
+ "options": [
+ {
+ "label": "String",
+ "name": "string"
+ },
+ {
+ "label": "Number",
+ "name": "number"
+ },
+ {
+ "label": "Boolean",
+ "name": "boolean"
+ },
+ {
+ "label": "Options",
+ "name": "options"
+ }
+ ],
+ "default": "string"
+ },
+ {
+ "label": "Label",
+ "name": "label",
+ "type": "string",
+ "placeholder": "Label for the input"
+ },
+ {
+ "label": "Variable Name",
+ "name": "name",
+ "type": "string",
+ "placeholder": "Variable name for the input (must be camel case)",
+ "description": "Variable name must be camel case. For example: firstName, lastName, etc."
+ },
+ {
+ "label": "Add Options",
+ "name": "addOptions",
+ "type": "array",
+ "show": {
+ "formInputTypes[$index].type": "options"
+ },
+ "array": [
+ {
+ "label": "Option",
+ "name": "option",
+ "type": "string"
+ }
+ ]
+ }
+ ],
+ "id": "startAgentflow_0-input-formInputTypes-array",
+ "display": false
+ },
+ {
+ "label": "Ephemeral Memory",
+ "name": "startEphemeralMemory",
+ "type": "boolean",
+ "description": "Start fresh for every execution without past chat history",
+ "optional": true,
+ "id": "startAgentflow_0-input-startEphemeralMemory-boolean",
+ "display": true
+ },
+ {
+ "label": "Flow State",
+ "name": "startState",
+ "description": "Runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "string",
+ "placeholder": "Foo"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "placeholder": "Bar",
+ "optional": true
+ }
+ ],
+ "id": "startAgentflow_0-input-startState-array",
+ "display": true
+ },
+ {
+ "label": "Persist State",
+ "name": "startPersistState",
+ "type": "boolean",
+ "description": "Persist the state in the same session",
+ "optional": true,
+ "id": "startAgentflow_0-input-startPersistState-boolean",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "startInputType": "chatInput",
+ "formTitle": "",
+ "formDescription": "",
+ "formInputTypes": "",
+ "startEphemeralMemory": "",
+ "startState": "",
+ "startPersistState": ""
+ },
+ "outputAnchors": [
+ {
+ "id": "startAgentflow_0-output-startAgentflow",
+ "label": "Start",
+ "name": "startAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "width": 103,
+ "height": 66,
+ "positionAbsolute": {
+ "x": -218.5,
+ "y": 116
+ },
+ "selected": false,
+ "dragging": false
+ },
+ {
+ "id": "agentAgentflow_0",
+ "position": {
+ "x": 143,
+ "y": 99.25
+ },
+ "data": {
+ "id": "agentAgentflow_0",
+ "label": "Agent 0",
+ "version": 3.2,
+ "name": "agentAgentflow",
+ "type": "Agent",
+ "color": "#4DD0E1",
+ "baseClasses": [
+ "Agent"
+ ],
+ "category": "Agent Flows",
+ "description": "Dynamically choose and utilize tools during runtime, enabling multi-step reasoning",
+ "inputParams": [
+ {
+ "label": "Model",
+ "name": "agentModel",
+ "type": "asyncOptions",
+ "loadMethod": "listModels",
+ "loadConfig": true,
+ "id": "agentAgentflow_0-input-agentModel-asyncOptions",
+ "display": true
+ },
+ {
+ "label": "Messages",
+ "name": "agentMessages",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Role",
+ "name": "role",
+ "type": "options",
+ "options": [
+ {
+ "label": "System",
+ "name": "system"
+ },
+ {
+ "label": "Assistant",
+ "name": "assistant"
+ },
+ {
+ "label": "Developer",
+ "name": "developer"
+ },
+ {
+ "label": "User",
+ "name": "user"
+ }
+ ]
+ },
+ {
+ "label": "Content",
+ "name": "content",
+ "type": "string",
+ "acceptVariable": true,
+ "generateInstruction": true,
+ "rows": 4
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentMessages-array",
+ "display": true
+ },
+ {
+ "label": "OpenAI Built-in Tools",
+ "name": "agentToolsBuiltInOpenAI",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "Web Search",
+ "name": "web_search_preview",
+ "description": "Search the web for the latest information"
+ },
+ {
+ "label": "Code Interpreter",
+ "name": "code_interpreter",
+ "description": "Write and run Python code in a sandboxed environment"
+ },
+ {
+ "label": "Image Generation",
+ "name": "image_generation",
+ "description": "Generate images based on a text prompt"
+ }
+ ],
+ "show": {
+ "agentModel": "chatOpenAI"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInOpenAI-multiOptions",
+ "display": true
+ },
+ {
+ "label": "Gemini Built-in Tools",
+ "name": "agentToolsBuiltInGemini",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "URL Context",
+ "name": "urlContext",
+ "description": "Extract content from given URLs"
+ },
+ {
+ "label": "Google Search",
+ "name": "googleSearch",
+ "description": "Search real-time web content"
+ },
+ {
+ "label": "Code Execution",
+ "name": "codeExecution",
+ "description": "Write and run Python code in a sandboxed environment"
+ }
+ ],
+ "show": {
+ "agentModel": "chatGoogleGenerativeAI"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInGemini-multiOptions",
+ "display": false
+ },
+ {
+ "label": "Anthropic Built-in Tools",
+ "name": "agentToolsBuiltInAnthropic",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "Web Search",
+ "name": "web_search_20250305",
+ "description": "Search the web for the latest information"
+ },
+ {
+ "label": "Web Fetch",
+ "name": "web_fetch_20250910",
+ "description": "Retrieve full content from specified web pages"
+ }
+ ],
+ "show": {
+ "agentModel": "chatAnthropic"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInAnthropic-multiOptions",
+ "display": false
+ },
+ {
+ "label": "Tools",
+ "name": "agentTools",
+ "type": "array",
+ "optional": true,
+ "array": [
+ {
+ "label": "Tool",
+ "name": "agentSelectedTool",
+ "type": "asyncOptions",
+ "loadMethod": "listTools",
+ "loadConfig": true
+ },
+ {
+ "label": "Require Human Input",
+ "name": "agentSelectedToolRequiresHumanInput",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentTools-array",
+ "display": true
+ },
+ {
+ "label": "Knowledge (Document Stores)",
+ "name": "agentKnowledgeDocumentStores",
+ "type": "array",
+ "description": "Give your agent context about different document sources. Document stores must be upserted in advance.",
+ "array": [
+ {
+ "label": "Document Store",
+ "name": "documentStore",
+ "type": "asyncOptions",
+ "loadMethod": "listStores"
+ },
+ {
+ "label": "Describe Knowledge",
+ "name": "docStoreDescription",
+ "type": "string",
+ "generateDocStoreDescription": true,
+ "placeholder": "Describe what the knowledge base is about, this is useful for the AI to know when and how to search for correct information",
+ "rows": 4
+ },
+ {
+ "label": "Return Source Documents",
+ "name": "returnSourceDocuments",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentKnowledgeDocumentStores-array",
+ "display": true
+ },
+ {
+ "label": "Knowledge (Vector Embeddings)",
+ "name": "agentKnowledgeVSEmbeddings",
+ "type": "array",
+ "description": "Give your agent context about different document sources from existing vector stores and embeddings",
+ "array": [
+ {
+ "label": "Vector Store",
+ "name": "vectorStore",
+ "type": "asyncOptions",
+ "loadMethod": "listVectorStores",
+ "loadConfig": true
+ },
+ {
+ "label": "Embedding Model",
+ "name": "embeddingModel",
+ "type": "asyncOptions",
+ "loadMethod": "listEmbeddings",
+ "loadConfig": true
+ },
+ {
+ "label": "Knowledge Name",
+ "name": "knowledgeName",
+ "type": "string",
+ "placeholder": "A short name for the knowledge base, this is useful for the AI to know when and how to search for correct information"
+ },
+ {
+ "label": "Describe Knowledge",
+ "name": "knowledgeDescription",
+ "type": "string",
+ "placeholder": "Describe what the knowledge base is about, this is useful for the AI to know when and how to search for correct information",
+ "rows": 4
+ },
+ {
+ "label": "Return Source Documents",
+ "name": "returnSourceDocuments",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentKnowledgeVSEmbeddings-array",
+ "display": true
+ },
+ {
+ "label": "Enable Memory",
+ "name": "agentEnableMemory",
+ "type": "boolean",
+ "description": "Enable memory for the conversation thread",
+ "default": true,
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentEnableMemory-boolean",
+ "display": true
+ },
+ {
+ "label": "Memory Type",
+ "name": "agentMemoryType",
+ "type": "options",
+ "options": [
+ {
+ "label": "All Messages",
+ "name": "allMessages",
+ "description": "Retrieve all messages from the conversation"
+ },
+ {
+ "label": "Window Size",
+ "name": "windowSize",
+ "description": "Uses a fixed window size to surface the last N messages"
+ },
+ {
+ "label": "Conversation Summary",
+ "name": "conversationSummary",
+ "description": "Summarizes the whole conversation"
+ },
+ {
+ "label": "Conversation Summary Buffer",
+ "name": "conversationSummaryBuffer",
+ "description": "Summarize conversations once token limit is reached. Default to 2000"
+ }
+ ],
+ "optional": true,
+ "default": "allMessages",
+ "show": {
+ "agentEnableMemory": true
+ },
+ "id": "agentAgentflow_0-input-agentMemoryType-options",
+ "display": true
+ },
+ {
+ "label": "Window Size",
+ "name": "agentMemoryWindowSize",
+ "type": "number",
+ "default": "20",
+ "description": "Uses a fixed window size to surface the last N messages",
+ "show": {
+ "agentMemoryType": "windowSize"
+ },
+ "id": "agentAgentflow_0-input-agentMemoryWindowSize-number",
+ "display": false
+ },
+ {
+ "label": "Max Token Limit",
+ "name": "agentMemoryMaxTokenLimit",
+ "type": "number",
+ "default": "2000",
+ "description": "Summarize conversations once token limit is reached. Default to 2000",
+ "show": {
+ "agentMemoryType": "conversationSummaryBuffer"
+ },
+ "id": "agentAgentflow_0-input-agentMemoryMaxTokenLimit-number",
+ "display": false
+ },
+ {
+ "label": "Input Message",
+ "name": "agentUserMessage",
+ "type": "string",
+ "description": "Add an input message as user message at the end of the conversation",
+ "rows": 4,
+ "optional": true,
+ "acceptVariable": true,
+ "show": {
+ "agentEnableMemory": true
+ },
+ "id": "agentAgentflow_0-input-agentUserMessage-string",
+ "display": true
+ },
+ {
+ "label": "Return Response As",
+ "name": "agentReturnResponseAs",
+ "type": "options",
+ "options": [
+ {
+ "label": "User Message",
+ "name": "userMessage"
+ },
+ {
+ "label": "Assistant Message",
+ "name": "assistantMessage"
+ }
+ ],
+ "default": "userMessage",
+ "id": "agentAgentflow_0-input-agentReturnResponseAs-options",
+ "display": true
+ },
+ {
+ "label": "JSON Structured Output",
+ "name": "agentStructuredOutput",
+ "description": "Instruct the Agent to give output in a JSON structured schema",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "string"
+ },
+ {
+ "label": "Type",
+ "name": "type",
+ "type": "options",
+ "options": [
+ {
+ "label": "String",
+ "name": "string"
+ },
+ {
+ "label": "String Array",
+ "name": "stringArray"
+ },
+ {
+ "label": "Number",
+ "name": "number"
+ },
+ {
+ "label": "Boolean",
+ "name": "boolean"
+ },
+ {
+ "label": "Enum",
+ "name": "enum"
+ },
+ {
+ "label": "JSON Array",
+ "name": "jsonArray"
+ }
+ ]
+ },
+ {
+ "label": "Enum Values",
+ "name": "enumValues",
+ "type": "string",
+ "placeholder": "value1, value2, value3",
+ "description": "Enum values. Separated by comma",
+ "optional": true,
+ "show": {
+ "agentStructuredOutput[$index].type": "enum"
+ }
+ },
+ {
+ "label": "JSON Schema",
+ "name": "jsonSchema",
+ "type": "code",
+ "placeholder": "{\n \"answer\": {\n \"type\": \"string\",\n \"description\": \"Value of the answer\"\n },\n \"reason\": {\n \"type\": \"string\",\n \"description\": \"Reason for the answer\"\n },\n \"optional\": {\n \"type\": \"boolean\"\n },\n \"count\": {\n \"type\": \"number\"\n },\n \"children\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"value\": {\n \"type\": \"string\",\n \"description\": \"Value of the children's answer\"\n }\n }\n }\n }\n}",
+ "description": "JSON schema for the structured output",
+ "optional": true,
+ "hideCodeExecute": true,
+ "show": {
+ "agentStructuredOutput[$index].type": "jsonArray"
+ }
+ },
+ {
+ "label": "Description",
+ "name": "description",
+ "type": "string",
+ "placeholder": "Description of the key"
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentStructuredOutput-array",
+ "display": true
+ },
+ {
+ "label": "Update Flow State",
+ "name": "agentUpdateState",
+ "description": "Update runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "asyncOptions",
+ "loadMethod": "listRuntimeStateKeys"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "acceptVariable": true,
+ "acceptNodeOutputAsVariable": true
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentUpdateState-array",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "agentModel": "chatOpenAICustom",
+ "agentMessages": [
+ {
+ "role": "system",
+ "content": "<instructions>
You are a customer service agent that helps the user according to the <policy> provided below.
============================================================
SECTION 1 — GENERIC EXECUTE_PYTHON INSTRUCTIONS
============================================================
HOW YOU WORK:
You have ONE tool: execute_python. It runs Python code in a sandbox.
Inside that sandbox, an actions object is pre-loaded with methods you call to perform operations (look up records, search options, modify state, etc.).
The full list of available actions and their parameters is documented in the execute_python tool description — READ IT before writing code.
TURN STRUCTURE (STRICT):
In each turn you must do EXACTLY ONE of the following — never both:
A) Send a text message to the user, OR
B) Make an execute_python call.
You MUST NOT combine a message and a tool call in the same turn.
In particular, if execute_python returned an error, your next turn must be ONLY an execute_python call with fixed code — no text, no apology, no explanation.
CRITICAL — WRITE COMPLETE SCRIPTS:
A single execute_python call can contain MANY actions.* calls chained together in one script.
You MUST combine all related steps into ONE execute_python call.
Do NOT make separate execute_python calls for each individual action.
Think about what information you need, then write ONE script that gathers and processes ALL of it.
Only make a second execute_python call if the first one fails or if you need user input before continuing.
SANDBOX ENVIRONMENT:
Your code runs in a restricted Python sandbox. These constraints apply:
- ALLOWED IMPORTS: json, math, datetime, time, re, collections, itertools — nothing else
- NO file I/O: open(), os, sys, subprocess, pathlib are all blocked
- NO exec(), eval(), compile()
- NO dunder attributes: name, class, dict etc. are blocked
- NO input(): extract information from the conversation instead
- The actions object is ALREADY AVAILABLE — do not import it
CRITICAL INPUT BAN (HIGHEST PRIORITY):
- NEVER use input() in execute_python code.
- NEVER use placeholder variables such as input, Input, or user_input.
- NEVER write code like email = input(...) or value = Input.
- If any value is missing, ask the user in a normal assistant message (not in Python code).
- In execute_python code, only use values already present in conversation/tool outputs.
- Any script using input() is invalid and must be rewritten before execution.
CODE GENERATION RULES:
1. Treat actions.* outputs by TYPE:
- If output is structured data (object/list), parse with json.loads() before field access.
- If output is a scalar (e.g., identifier/status string), use it directly.
Safe pattern:
import json
raw = actions.ANY_METHOD(args)
# parse when raw looks like structured JSON; otherwise use raw directly
data = json.loads(raw) if isinstance(raw, str) and raw.lstrip().startswith('[') else raw
# for JSON objects, use the same idea: parse when output is structured JSON text
❌ WRONG — accessing fields on the raw JSON string:
result = actions.some_write_method(...)
print(result['status']) # CRASH: result is a STRING, not a dict
✅ CORRECT — parse first, then access:
result = json.loads(actions.some_write_method(...))
print(result['status'])
2. ALWAYS print results — print() is the ONLY way to see output:
print(data)
3. DICT vs LIST — read the signature line for each action:
Many actions return DICTS (not lists). Iterating a dict gives you KEYS (strings), not objects.
❌ WRONG — treats dict as list of objects:
for item in data:
print(item['name']) # CRASH: item is a string key, not a dict
✅ CORRECT — use .items() for dicts:
for key, value in data.items():
print(key, value)
When unsure, print the data first: print(type(data), data)
4. STATELESS: Variables do NOT persist between execute_python calls.
Put ALL steps in ONE script.
5. NEVER fabricate identifiers or option values.
Extract concrete values from tool outputs and reuse them exactly.
Never pass placeholder tokens like \"user_id\", \"order_id\", \"item_id\", \"payment_method_id\"
as actual values. Those are parameter NAMES, not real values.
6. PRE-FLIGHT CHECKLIST before any state-changing action:
Before calling any write action, verify all required arguments come from current
data in THIS script and satisfy preconditions.
Generic checks:
- Every argument variable is defined before use (no undefined names).
- No input()/Input/user_input usage anywhere in the script.
- Entities referenced by the action are confirmed to exist in retrieved data.
- Current state allows the action (e.g., status/eligibility/business constraints).
- Argument values are concrete runtime values (not placeholder strings).
Pattern:
# 1) Read current state
state = json.loads(actions.some_read_method(...))
print(state)
# 2) Validate preconditions and required values
# (extract concrete values from state; do not invent placeholders)
# 3) Execute write action only after checks
result = json.loads(actions.some_write_method(...))
print(result)
7. LOOKUP + EXECUTION GUARDRAILS (GENERIC):
A) LOOKUP FALLBACK (max 2 tries):
- Try one primary lookup strategy.
- If it fails, try one alternative strategy.
- If both fail, DO NOT guess values and DO NOT run write actions.
Ask the user for the missing value(s) in a normal assistant message.
B) NEVER USE PLACEHOLDER LITERALS AS REAL VALUES:
Parameter names are not runtime values.
Forbidden as actual values: \"user_id\", \"order_id\", \"item_id\", \"product_id\",
\"payment_method_id\", \"email\", \"first_name\", \"last_name\", \"zip\", etc.
C) READ-THEN-WRITE DISCIPLINE:
Before any write action, explicitly read current state and confirm:
- target entity exists
- operation is allowed in current state
- selected replacement/option/value is valid in current data
D) ARGUMENT SANITY CHECKS BEFORE WRITE:
For each required argument:
- it is defined in this script
- it is non-empty and concrete (not placeholder text)
- it was derived from conversation/tool output
Print chosen argument values before the write call.
E) TWO-PHASE SCRIPT TEMPLATE:
- Phase 1: gather and print all required facts
- Phase 2: execute write action only if checks pass
8. MANDATORY PRE-EXECUTION SELF-CHECK (RUN THIS MENTALLY BEFORE EVERY execute_python CALL):
- No input(), input, Input, or user_input anywhere in code.
- No placeholder literals used as real values \"user_id\", \"order_id\", etc.).
- Every variable used in arguments is defined in THIS script.
- For every critical value, print it before write actions.
- If a lookup failed twice, DO NOT continue with write actions; ask user for corrected value.
9. STRICT LOOKUP FAILURE RULE:
If authentication/lookup fails (target entity not found), you MUST stop mutation flow.
Do not call write actions after unresolved lookup failure.
Your next action is either:
- one alternative lookup attempt, OR
- ask the user for corrected identifying information.
SILENT ERROR RECOVERY (MANDATORY):
When execute_python returns an error, you MUST:
1. NOT produce any text message — no apology, no explanation, no \"let me try again\"
2. Immediately make another execute_python call with corrected code
3. The user must NEVER know an error occurred
This means your very next turn after an error is ONLY a tool call — zero words to the user.
❌ WRONG: \"I apologize for the error. Let me try a different approach.\" + execute_python
❌ WRONG: \"Let me fix that.\" + execute_python
❌ WRONG: Any text at all before or alongside the retry
✅ CORRECT: execute_python (with fixed code, no accompanying text)
Only speak to the user when you have a successful result or need information from them.
If after 3 retries you still cannot complete the task, you may say there is a temporary technical issue — but never mention implementation details.
ANTI-LOOP GUARD (GENERIC):
- If you hit the same error class twice in a row (e.g., repeated \"not found\" or repeated type/index error),
stop retrying variations and switch strategy:
1) one alternative lookup/validation path, then
2) ask user for the missing/corrected value if still unresolved.
- Do not burn steps by repeating near-identical failing scripts.
============================================================
SECTION 2 — USE-CASE SPECIFIC EXAMPLES (STOCKS)
============================================================
CRITICAL — WRITE COMPLETE SCRIPTS EXAMPLE:
❌ WRONG (multiple execute_python calls, one action each):
Turn 1: execute_python → actions.find_account_id_by_email(\"jane@example.com\")
Turn 2: execute_python → actions.get_account_summary(\"acc_jane_001\")
Turn 3: execute_python → actions.place_market_buy(\"acc_jane_001\", \"AAPL\", 5)
✅ CORRECT (one execute_python call with a complete script):
execute_python →
import json
account_id = actions.find_account_id_by_email(\"jane.miller@example.com\")
print(f\"Account ID: {account_id}\")
account = json.loads(actions.get_account_summary(account_id))
print(f\"Cash: {account['cash_balance']}\")
quote = json.loads(actions.get_quote(\"AAPL\"))
print(f\"AAPL price: {quote['current_price']}\")
STOCKS STATE TRUTH RULE (MANDATORY):
- Never report balances, positions, order status, or order history from memory.
- Every factual state claim must come from tool output retrieved in the SAME execute_python script.
- Before replying with account/order state, re-read from DB using:
get_account_summary, get_portfolio, and/or get_order_history.
STOCKS IDENTIFIER DISCIPLINE:
- Never invent account IDs, order IDs, symbols, prices, or quantities.
- Account actions must start with account identification (usually by email).
- Trade symbols must come from user request + list_available_symbols() / get_quote() validation.
- For cancel requests, verify order ownership and current order status before mutation.
STOCKS API NAME DISCIPLINE (MANDATORY):
- Use only stocks actions listed in the current API REFERENCE.
- Allowed account + market read methods:
actions.find_account_id_by_email, actions.get_account_summary, actions.get_portfolio,
actions.get_quote, actions.list_available_symbols, actions.list_market_movers,
actions.get_order_history.
- Allowed trading/write methods:
actions.place_market_buy, actions.place_market_sell,
actions.place_limit_buy, actions.place_limit_sell, actions.cancel_open_order.
- Allowed utility methods:
actions.calculate, actions.transfer_to_human_agents.
- If a method name is not in this stocks allowlist or current stocks API REFERENCE, do not call it.
FIRST AUTH TURN TEMPLATE (STOCKS):
- After user provides email, the first auth script must use:
1) account_id = actions.find_account_id_by_email(email)
2) account = json.loads(actions.get_account_summary(account_id))
3) portfolio = json.loads(actions.get_portfolio(account_id))
4) print all three outputs
- Do not use any user_id variable in stocks workflows.
AUTHENTICATION GATE (MANDATORY):
- Never claim the user is authenticated before BOTH conditions are true:
1) user has explicitly provided an email in conversation, and
2) find_account_id_by_email(email) succeeded in execute_python.
- If email is missing, ask for email and do NOT run account/trading tools.
- If lookup fails, do NOT claim success; ask for corrected email.
- Do not mention account name, balances, holdings, or order history until auth succeeds.
- In the first successful auth response, include the exact authenticated account_id from tool output.
MUTATION EXECUTION CONTRACT (MANDATORY):
- For mutation intents place_market_buy, place_market_sell, place_limit_buy,
place_limit_sell, cancel_open_order):
1) first ask for confirmation,
2) after user confirms (\"yes\"), your NEXT turn must be an execute_python call that performs the mutation.
- Do not send a text-only \"success\" message before that tool call occurs.
- Never claim an order was placed/cancelled unless tool output in that turn contains concrete mutation evidence
(for example order_id, status, and order details).
- If the tool call fails, do not pretend success; follow retry/reconciliation rules.
STOCKS TRADE PRECHECKS (MANDATORY):
Before any buy/sell/cancel call, gather and print:
1) account_id and current cash balance / holdings
2) symbol and live quote used for decision
3) quantity and order type parameters
4) for limits: selected limit price and relation to current market price
If any required value is missing or invalid, ask the user rather than guessing.
POST-WRITE VERIFICATION (MANDATORY):
- After any mutation place_* / cancel_open_order), in the SAME execute_python script:
1) print the mutation result,
2) re-read order history and print the affected order ID + status,
3) re-read account summary and portfolio and print updated cash/position values.
- Do not claim success unless these verification reads match the claim.
MISMATCH RECONCILIATION RULE:
- If user says the result is wrong/missing, do not apologize repeatedly and do not guess.
- Run one reconciliation script that re-fetches:
get_account_summary, get_portfolio, and get_order_history.
- Report exactly what is in DB now.
- If mismatch persists after one reconciliation attempt, perform one corrective write attempt if valid.
- Transfer to human only if corrective attempt is impossible or fails with a concrete tool limitation.
TRANSFER GATE (STRICT):
- Do NOT transfer solely due to temporary execution errors or uncertainty.
- Transfer only when:
1) request is out of policy, or
2) required capability is unavailable, confirmed by concrete tool error after retries.
TRANSFER TO HUMAN AGENT:
To transfer, make an execute_python call with code: actions.transfer_to_human_agents(\"summary of the issue\"), then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
</instructions>
<policy>
# Stocks trading agent policy
As a stocks trading support agent, you can help users:
- authenticate trading account via email lookup
- review account profile, cash balance, holdings, quotes, and order history
- place market and limit buy/sell orders
- cancel open orders
- provide neutral factual market data from available tools
At the beginning of the conversation, authenticate the user account before any trade action.
Authentication is valid only when BOTH are true:
- the user explicitly provided an email in conversation, and
- a tool call to find_account_id_by_email succeeds.
You must never claim authentication success before those two conditions are met.
If email is missing, ask for email first.
If lookup fails, ask for corrected email and do not proceed to account/trading operations.
You can only operate on one authenticated account per conversation and must deny requests to access or trade another account.
Before taking any action that updates the database (place/cancel orders), list action details and obtain explicit user confirmation (yes) to proceed.
After user confirmation for a write action, you must execute the write via tool call before any success statement.
Do not provide text-only completion claims for place/cancel operations.
All account/order facts you present (cash, positions, order status/history) must come from current tool output, not memory.
Before presenting state to the user, re-read the relevant data from tools in the same execution flow.
Do not provide financial advice, predictions, or recommendations. Only provide factual tool-backed information.
Do not fabricate symbols, account ids, order ids, prices, quantities, balances, or execution outcomes.
After each successful mutation (place/cancel order), you must verify by re-reading:
- order history (including affected order id + status), and
- account summary / portfolio (cash + position updates).
Do not claim success unless verification output matches the claim.
Any claimed order id, status, balance, or position update must be directly traceable to current tool output.
You should at most make one tool call at a time, and if you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call at the same time.
You should deny requests that violate policy or require unavailable capabilities.
Transfer to a human agent if and only if the request cannot be handled with available actions.
Do not transfer solely due to uncertainty or temporary execution issues.
Transfer only for:
- out-of-policy requests, or
- confirmed tool capability limitations after retry/reconciliation.
If user reports a mismatch, run one reconciliation by re-reading account summary, portfolio, and order history and report what the DB currently shows.
Only after reconciliation and (if applicable) one valid corrective attempt may you transfer.
To transfer, first call transfer_to_human_agents, then send: 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.'
## Domain basics
- All prices are in USD.
- Market quote values are demo values from the local tools database.
- A market order executes immediately at the current quote price.
- A limit order is filled immediately only when its condition is met; otherwise it remains open.
- Cancelling is allowed only for open orders.
- Positions track quantity and average cost per symbol.
- Cash balance and positions are updated only by successful trade mutations.
</policy>
"
+ }
+ ],
+ "agentToolsBuiltInOpenAI": "",
+ "agentTools": [
+ {
+ "agentSelectedTool": "customMCP",
+ "agentSelectedToolRequiresHumanInput": false,
+ "agentSelectedToolConfig": {
+ "mcpServerConfig": "{\n \"url\": \"http://172.17.0.1:5051/sse\",\n \"transport\": \"sse\"\n}",
+ "mcpActions": "[\"execute_python\",\"list_available_actions\"]",
+ "agentSelectedTool": "customMCP"
+ }
+ }
+ ],
+ "agentKnowledgeDocumentStores": "",
+ "agentKnowledgeVSEmbeddings": "",
+ "agentEnableMemory": true,
+ "agentMemoryType": "allMessages",
+ "agentUserMessage": "",
+ "agentReturnResponseAs": "userMessage",
+ "agentStructuredOutput": "",
+ "agentUpdateState": "",
+ "agentModelConfig": {
+ "cache": "",
+ "modelName": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+ "temperature": "0.0",
+ "streaming": true,
+ "maxTokens": "",
+ "topP": "",
+ "frequencyPenalty": "",
+ "presencePenalty": "",
+ "timeout": "",
+ "basepath": "http://172.17.0.1:8002/v1",
+ "baseOptions": "",
+ "agentModel": "chatOpenAICustom"
+ },
+ "undefined": ""
+ },
+ "outputAnchors": [
+ {
+ "id": "agentAgentflow_0-output-agentAgentflow",
+ "label": "Agent",
+ "name": "agentAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "type": "agentFlow",
+ "width": 320,
+ "height": 100,
+ "selected": false,
+ "dragging": false,
+ "positionAbsolute": {
+ "x": 143,
+ "y": 99.25
+ }
+ },
+ {
+ "id": "toolAgentflow_0",
+ "position": {
+ "x": -42.5,
+ "y": 116
+ },
+ "data": {
+ "id": "toolAgentflow_0",
+ "label": "Tool 0",
+ "version": 1.2,
+ "name": "toolAgentflow",
+ "type": "Tool",
+ "color": "#d4a373",
+ "baseClasses": [
+ "Tool"
+ ],
+ "category": "Agent Flows",
+ "description": "Tools allow LLM to interact with external systems",
+ "inputParams": [
+ {
+ "label": "Tool",
+ "name": "toolAgentflowSelectedTool",
+ "type": "asyncOptions",
+ "loadMethod": "listTools",
+ "loadConfig": true,
+ "id": "toolAgentflow_0-input-toolAgentflowSelectedTool-asyncOptions",
+ "display": true
+ },
+ {
+ "label": "Tool Input Arguments",
+ "name": "toolInputArgs",
+ "type": "array",
+ "acceptVariable": true,
+ "refresh": true,
+ "array": [
+ {
+ "label": "Input Argument Name",
+ "name": "inputArgName",
+ "type": "asyncOptions",
+ "loadMethod": "listToolInputArgs",
+ "refresh": true
+ },
+ {
+ "label": "Input Argument Value",
+ "name": "inputArgValue",
+ "type": "string",
+ "acceptVariable": true
+ }
+ ],
+ "show": {
+ "toolAgentflowSelectedTool": ".+"
+ },
+ "id": "toolAgentflow_0-input-toolInputArgs-array",
+ "display": true
+ },
+ {
+ "label": "Update Flow State",
+ "name": "toolUpdateState",
+ "description": "Update runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "asyncOptions",
+ "loadMethod": "listRuntimeStateKeys"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "acceptVariable": true,
+ "acceptNodeOutputAsVariable": true
+ }
+ ],
+ "id": "toolAgentflow_0-input-toolUpdateState-array",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "toolAgentflowSelectedTool": "customMCP",
+ "toolInputArgs": [
+ {
+ "inputArgName": "sessionId",
+ "inputArgValue": "{{ $flow.sessionId }}
"
+ },
+ {
+ "inputArgName": "chatId",
+ "inputArgValue": "{{ $flow.chatId }}
"
+ }
+ ],
+ "toolUpdateState": "",
+ "toolAgentflowSelectedToolConfig": {
+ "mcpServerConfig": "{\n \"url\": \"http://172.17.0.1:5051/sse\",\n \"transport\": \"sse\"\n}",
+ "mcpActions": "[\"get_session_id\"]",
+ "toolAgentflowSelectedTool": "customMCP"
+ }
+ },
+ "outputAnchors": [
+ {
+ "id": "toolAgentflow_0-output-toolAgentflow",
+ "label": "Tool",
+ "name": "toolAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "type": "agentFlow",
+ "width": 112,
+ "height": 68,
+ "selected": false,
+ "positionAbsolute": {
+ "x": -42.5,
+ "y": 116
+ },
+ "dragging": false
+ }
+ ],
+ "edges": [
+ {
+ "source": "startAgentflow_0",
+ "sourceHandle": "startAgentflow_0-output-startAgentflow",
+ "target": "toolAgentflow_0",
+ "targetHandle": "toolAgentflow_0",
+ "data": {
+ "sourceColor": "#7EE787",
+ "targetColor": "#d4a373",
+ "isHumanInput": false
+ },
+ "type": "agentFlow",
+ "id": "startAgentflow_0-startAgentflow_0-output-startAgentflow-toolAgentflow_0-toolAgentflow_0"
+ },
+ {
+ "source": "toolAgentflow_0",
+ "sourceHandle": "toolAgentflow_0-output-toolAgentflow",
+ "target": "agentAgentflow_0",
+ "targetHandle": "agentAgentflow_0",
+ "data": {
+ "sourceColor": "#d4a373",
+ "targetColor": "#4DD0E1",
+ "isHumanInput": false
+ },
+ "type": "agentFlow",
+ "id": "toolAgentflow_0-toolAgentflow_0-output-toolAgentflow-agentAgentflow_0-agentAgentflow_0"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_triage.json b/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_triage.json
new file mode 100644
index 00000000..deaa988f
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/Flowise/agentflow_code_execution_triage.json
@@ -0,0 +1,868 @@
+{
+ "nodes": [
+ {
+ "id": "startAgentflow_0",
+ "type": "agentFlow",
+ "position": {
+ "x": -218.5,
+ "y": 116
+ },
+ "data": {
+ "id": "startAgentflow_0",
+ "label": "Start",
+ "version": 1.1,
+ "name": "startAgentflow",
+ "type": "Start",
+ "color": "#7EE787",
+ "hideInput": true,
+ "baseClasses": [
+ "Start"
+ ],
+ "category": "Agent Flows",
+ "description": "Starting point of the agentflow",
+ "inputParams": [
+ {
+ "label": "Input Type",
+ "name": "startInputType",
+ "type": "options",
+ "options": [
+ {
+ "label": "Chat Input",
+ "name": "chatInput",
+ "description": "Start the conversation with chat input"
+ },
+ {
+ "label": "Form Input",
+ "name": "formInput",
+ "description": "Start the workflow with form inputs"
+ }
+ ],
+ "default": "chatInput",
+ "id": "startAgentflow_0-input-startInputType-options",
+ "display": true
+ },
+ {
+ "label": "Form Title",
+ "name": "formTitle",
+ "type": "string",
+ "placeholder": "Please Fill Out The Form",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "id": "startAgentflow_0-input-formTitle-string",
+ "display": false
+ },
+ {
+ "label": "Form Description",
+ "name": "formDescription",
+ "type": "string",
+ "placeholder": "Complete all fields below to continue",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "id": "startAgentflow_0-input-formDescription-string",
+ "display": false
+ },
+ {
+ "label": "Form Input Types",
+ "name": "formInputTypes",
+ "description": "Specify the type of form input",
+ "type": "array",
+ "show": {
+ "startInputType": "formInput"
+ },
+ "array": [
+ {
+ "label": "Type",
+ "name": "type",
+ "type": "options",
+ "options": [
+ {
+ "label": "String",
+ "name": "string"
+ },
+ {
+ "label": "Number",
+ "name": "number"
+ },
+ {
+ "label": "Boolean",
+ "name": "boolean"
+ },
+ {
+ "label": "Options",
+ "name": "options"
+ }
+ ],
+ "default": "string"
+ },
+ {
+ "label": "Label",
+ "name": "label",
+ "type": "string",
+ "placeholder": "Label for the input"
+ },
+ {
+ "label": "Variable Name",
+ "name": "name",
+ "type": "string",
+ "placeholder": "Variable name for the input (must be camel case)",
+ "description": "Variable name must be camel case. For example: firstName, lastName, etc."
+ },
+ {
+ "label": "Add Options",
+ "name": "addOptions",
+ "type": "array",
+ "show": {
+ "formInputTypes[$index].type": "options"
+ },
+ "array": [
+ {
+ "label": "Option",
+ "name": "option",
+ "type": "string"
+ }
+ ]
+ }
+ ],
+ "id": "startAgentflow_0-input-formInputTypes-array",
+ "display": false
+ },
+ {
+ "label": "Ephemeral Memory",
+ "name": "startEphemeralMemory",
+ "type": "boolean",
+ "description": "Start fresh for every execution without past chat history",
+ "optional": true,
+ "id": "startAgentflow_0-input-startEphemeralMemory-boolean",
+ "display": true
+ },
+ {
+ "label": "Flow State",
+ "name": "startState",
+ "description": "Runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "string",
+ "placeholder": "Foo"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "placeholder": "Bar",
+ "optional": true
+ }
+ ],
+ "id": "startAgentflow_0-input-startState-array",
+ "display": true
+ },
+ {
+ "label": "Persist State",
+ "name": "startPersistState",
+ "type": "boolean",
+ "description": "Persist the state in the same session",
+ "optional": true,
+ "id": "startAgentflow_0-input-startPersistState-boolean",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "startInputType": "chatInput",
+ "formTitle": "",
+ "formDescription": "",
+ "formInputTypes": "",
+ "startEphemeralMemory": "",
+ "startState": "",
+ "startPersistState": ""
+ },
+ "outputAnchors": [
+ {
+ "id": "startAgentflow_0-output-startAgentflow",
+ "label": "Start",
+ "name": "startAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "width": 103,
+ "height": 66,
+ "positionAbsolute": {
+ "x": -218.5,
+ "y": 116
+ },
+ "selected": false,
+ "dragging": false
+ },
+ {
+ "id": "agentAgentflow_0",
+ "position": {
+ "x": 143,
+ "y": 99.25
+ },
+ "data": {
+ "id": "agentAgentflow_0",
+ "label": "Agent 0",
+ "version": 3.2,
+ "name": "agentAgentflow",
+ "type": "Agent",
+ "color": "#4DD0E1",
+ "baseClasses": [
+ "Agent"
+ ],
+ "category": "Agent Flows",
+ "description": "Dynamically choose and utilize tools during runtime, enabling multi-step reasoning",
+ "inputParams": [
+ {
+ "label": "Model",
+ "name": "agentModel",
+ "type": "asyncOptions",
+ "loadMethod": "listModels",
+ "loadConfig": true,
+ "id": "agentAgentflow_0-input-agentModel-asyncOptions",
+ "display": true
+ },
+ {
+ "label": "Messages",
+ "name": "agentMessages",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Role",
+ "name": "role",
+ "type": "options",
+ "options": [
+ {
+ "label": "System",
+ "name": "system"
+ },
+ {
+ "label": "Assistant",
+ "name": "assistant"
+ },
+ {
+ "label": "Developer",
+ "name": "developer"
+ },
+ {
+ "label": "User",
+ "name": "user"
+ }
+ ]
+ },
+ {
+ "label": "Content",
+ "name": "content",
+ "type": "string",
+ "acceptVariable": true,
+ "generateInstruction": true,
+ "rows": 4
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentMessages-array",
+ "display": true
+ },
+ {
+ "label": "OpenAI Built-in Tools",
+ "name": "agentToolsBuiltInOpenAI",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "Web Search",
+ "name": "web_search_preview",
+ "description": "Search the web for the latest information"
+ },
+ {
+ "label": "Code Interpreter",
+ "name": "code_interpreter",
+ "description": "Write and run Python code in a sandboxed environment"
+ },
+ {
+ "label": "Image Generation",
+ "name": "image_generation",
+ "description": "Generate images based on a text prompt"
+ }
+ ],
+ "show": {
+ "agentModel": "chatOpenAI"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInOpenAI-multiOptions",
+ "display": true
+ },
+ {
+ "label": "Gemini Built-in Tools",
+ "name": "agentToolsBuiltInGemini",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "URL Context",
+ "name": "urlContext",
+ "description": "Extract content from given URLs"
+ },
+ {
+ "label": "Google Search",
+ "name": "googleSearch",
+ "description": "Search real-time web content"
+ },
+ {
+ "label": "Code Execution",
+ "name": "codeExecution",
+ "description": "Write and run Python code in a sandboxed environment"
+ }
+ ],
+ "show": {
+ "agentModel": "chatGoogleGenerativeAI"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInGemini-multiOptions",
+ "display": false
+ },
+ {
+ "label": "Anthropic Built-in Tools",
+ "name": "agentToolsBuiltInAnthropic",
+ "type": "multiOptions",
+ "optional": true,
+ "options": [
+ {
+ "label": "Web Search",
+ "name": "web_search_20250305",
+ "description": "Search the web for the latest information"
+ },
+ {
+ "label": "Web Fetch",
+ "name": "web_fetch_20250910",
+ "description": "Retrieve full content from specified web pages"
+ }
+ ],
+ "show": {
+ "agentModel": "chatAnthropic"
+ },
+ "id": "agentAgentflow_0-input-agentToolsBuiltInAnthropic-multiOptions",
+ "display": false
+ },
+ {
+ "label": "Tools",
+ "name": "agentTools",
+ "type": "array",
+ "optional": true,
+ "array": [
+ {
+ "label": "Tool",
+ "name": "agentSelectedTool",
+ "type": "asyncOptions",
+ "loadMethod": "listTools",
+ "loadConfig": true
+ },
+ {
+ "label": "Require Human Input",
+ "name": "agentSelectedToolRequiresHumanInput",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentTools-array",
+ "display": true
+ },
+ {
+ "label": "Knowledge (Document Stores)",
+ "name": "agentKnowledgeDocumentStores",
+ "type": "array",
+ "description": "Give your agent context about different document sources. Document stores must be upserted in advance.",
+ "array": [
+ {
+ "label": "Document Store",
+ "name": "documentStore",
+ "type": "asyncOptions",
+ "loadMethod": "listStores"
+ },
+ {
+ "label": "Describe Knowledge",
+ "name": "docStoreDescription",
+ "type": "string",
+ "generateDocStoreDescription": true,
+ "placeholder": "Describe what the knowledge base is about, this is useful for the AI to know when and how to search for correct information",
+ "rows": 4
+ },
+ {
+ "label": "Return Source Documents",
+ "name": "returnSourceDocuments",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentKnowledgeDocumentStores-array",
+ "display": true
+ },
+ {
+ "label": "Knowledge (Vector Embeddings)",
+ "name": "agentKnowledgeVSEmbeddings",
+ "type": "array",
+ "description": "Give your agent context about different document sources from existing vector stores and embeddings",
+ "array": [
+ {
+ "label": "Vector Store",
+ "name": "vectorStore",
+ "type": "asyncOptions",
+ "loadMethod": "listVectorStores",
+ "loadConfig": true
+ },
+ {
+ "label": "Embedding Model",
+ "name": "embeddingModel",
+ "type": "asyncOptions",
+ "loadMethod": "listEmbeddings",
+ "loadConfig": true
+ },
+ {
+ "label": "Knowledge Name",
+ "name": "knowledgeName",
+ "type": "string",
+ "placeholder": "A short name for the knowledge base, this is useful for the AI to know when and how to search for correct information"
+ },
+ {
+ "label": "Describe Knowledge",
+ "name": "knowledgeDescription",
+ "type": "string",
+ "placeholder": "Describe what the knowledge base is about, this is useful for the AI to know when and how to search for correct information",
+ "rows": 4
+ },
+ {
+ "label": "Return Source Documents",
+ "name": "returnSourceDocuments",
+ "type": "boolean",
+ "optional": true
+ }
+ ],
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentKnowledgeVSEmbeddings-array",
+ "display": true
+ },
+ {
+ "label": "Enable Memory",
+ "name": "agentEnableMemory",
+ "type": "boolean",
+ "description": "Enable memory for the conversation thread",
+ "default": true,
+ "optional": true,
+ "id": "agentAgentflow_0-input-agentEnableMemory-boolean",
+ "display": true
+ },
+ {
+ "label": "Memory Type",
+ "name": "agentMemoryType",
+ "type": "options",
+ "options": [
+ {
+ "label": "All Messages",
+ "name": "allMessages",
+ "description": "Retrieve all messages from the conversation"
+ },
+ {
+ "label": "Window Size",
+ "name": "windowSize",
+ "description": "Uses a fixed window size to surface the last N messages"
+ },
+ {
+ "label": "Conversation Summary",
+ "name": "conversationSummary",
+ "description": "Summarizes the whole conversation"
+ },
+ {
+ "label": "Conversation Summary Buffer",
+ "name": "conversationSummaryBuffer",
+ "description": "Summarize conversations once token limit is reached. Default to 2000"
+ }
+ ],
+ "optional": true,
+ "default": "allMessages",
+ "show": {
+ "agentEnableMemory": true
+ },
+ "id": "agentAgentflow_0-input-agentMemoryType-options",
+ "display": true
+ },
+ {
+ "label": "Window Size",
+ "name": "agentMemoryWindowSize",
+ "type": "number",
+ "default": "20",
+ "description": "Uses a fixed window size to surface the last N messages",
+ "show": {
+ "agentMemoryType": "windowSize"
+ },
+ "id": "agentAgentflow_0-input-agentMemoryWindowSize-number",
+ "display": false
+ },
+ {
+ "label": "Max Token Limit",
+ "name": "agentMemoryMaxTokenLimit",
+ "type": "number",
+ "default": "2000",
+ "description": "Summarize conversations once token limit is reached. Default to 2000",
+ "show": {
+ "agentMemoryType": "conversationSummaryBuffer"
+ },
+ "id": "agentAgentflow_0-input-agentMemoryMaxTokenLimit-number",
+ "display": false
+ },
+ {
+ "label": "Input Message",
+ "name": "agentUserMessage",
+ "type": "string",
+ "description": "Add an input message as user message at the end of the conversation",
+ "rows": 4,
+ "optional": true,
+ "acceptVariable": true,
+ "show": {
+ "agentEnableMemory": true
+ },
+ "id": "agentAgentflow_0-input-agentUserMessage-string",
+ "display": true
+ },
+ {
+ "label": "Return Response As",
+ "name": "agentReturnResponseAs",
+ "type": "options",
+ "options": [
+ {
+ "label": "User Message",
+ "name": "userMessage"
+ },
+ {
+ "label": "Assistant Message",
+ "name": "assistantMessage"
+ }
+ ],
+ "default": "userMessage",
+ "id": "agentAgentflow_0-input-agentReturnResponseAs-options",
+ "display": true
+ },
+ {
+ "label": "JSON Structured Output",
+ "name": "agentStructuredOutput",
+ "description": "Instruct the Agent to give output in a JSON structured schema",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "string"
+ },
+ {
+ "label": "Type",
+ "name": "type",
+ "type": "options",
+ "options": [
+ {
+ "label": "String",
+ "name": "string"
+ },
+ {
+ "label": "String Array",
+ "name": "stringArray"
+ },
+ {
+ "label": "Number",
+ "name": "number"
+ },
+ {
+ "label": "Boolean",
+ "name": "boolean"
+ },
+ {
+ "label": "Enum",
+ "name": "enum"
+ },
+ {
+ "label": "JSON Array",
+ "name": "jsonArray"
+ }
+ ]
+ },
+ {
+ "label": "Enum Values",
+ "name": "enumValues",
+ "type": "string",
+ "placeholder": "value1, value2, value3",
+ "description": "Enum values. Separated by comma",
+ "optional": true,
+ "show": {
+ "agentStructuredOutput[$index].type": "enum"
+ }
+ },
+ {
+ "label": "JSON Schema",
+ "name": "jsonSchema",
+ "type": "code",
+ "placeholder": "{\n \"answer\": {\n \"type\": \"string\",\n \"description\": \"Value of the answer\"\n },\n \"reason\": {\n \"type\": \"string\",\n \"description\": \"Reason for the answer\"\n },\n \"optional\": {\n \"type\": \"boolean\"\n },\n \"count\": {\n \"type\": \"number\"\n },\n \"children\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"value\": {\n \"type\": \"string\",\n \"description\": \"Value of the children's answer\"\n }\n }\n }\n }\n}",
+ "description": "JSON schema for the structured output",
+ "optional": true,
+ "hideCodeExecute": true,
+ "show": {
+ "agentStructuredOutput[$index].type": "jsonArray"
+ }
+ },
+ {
+ "label": "Description",
+ "name": "description",
+ "type": "string",
+ "placeholder": "Description of the key"
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentStructuredOutput-array",
+ "display": true
+ },
+ {
+ "label": "Update Flow State",
+ "name": "agentUpdateState",
+ "description": "Update runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "asyncOptions",
+ "loadMethod": "listRuntimeStateKeys"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "acceptVariable": true,
+ "acceptNodeOutputAsVariable": true
+ }
+ ],
+ "id": "agentAgentflow_0-input-agentUpdateState-array",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "agentModel": "chatOpenAICustom",
+ "agentMessages": [
+ {
+ "role": "system",
+ "content": "<instructions>
You are a customer service agent that helps the user according to the <policy> provided below.
============================================================
SECTION 1 — GENERIC EXECUTE_PYTHON INSTRUCTIONS
============================================================
HOW YOU WORK:
You have ONE tool: execute_python. It runs Python code in a sandbox.
Inside that sandbox, an actions object is pre-loaded with methods you call to perform operations (look up records, search options, modify state, etc.).
The full list of available actions and their parameters is documented in the execute_python tool description — READ IT before writing code.
TURN STRUCTURE (STRICT):
In each turn you must do EXACTLY ONE of the following — never both:
A) Send a text message to the user, OR
B) Make an execute_python call.
You MUST NOT combine a message and a tool call in the same turn.
In particular, if execute_python returned an error, your next turn must be ONLY an execute_python call with fixed code — no text, no apology, no explanation.
CRITICAL — WRITE COMPLETE SCRIPTS:
A single execute_python call can contain MANY actions.* calls chained together in one script.
You MUST combine all related steps into ONE execute_python call.
Do NOT make separate execute_python calls for each individual action.
Think about what information you need, then write ONE script that gathers and processes ALL of it.
Only make a second execute_python call if the first one fails or if you need user input before continuing.
SANDBOX ENVIRONMENT:
Your code runs in a restricted Python sandbox. These constraints apply:
- ALLOWED IMPORTS: json, math, datetime, time, re, collections, itertools — nothing else
- NO file I/O: open(), os, sys, subprocess, pathlib are all blocked
- NO exec(), eval(), compile()
- NO dunder attributes: name, class, dict etc. are blocked
- NO input(): extract information from the conversation instead
- The actions object is ALREADY AVAILABLE — do not import it
CRITICAL INPUT BAN (HIGHEST PRIORITY):
- NEVER use input() in execute_python code.
- NEVER use placeholder variables such as input, Input, or user_input.
- NEVER write code like email = input(...) or value = Input.
- If any value is missing, ask the user in a normal assistant message (not in Python code).
- In execute_python code, only use values already present in conversation/tool outputs.
- Any script using input() is invalid and must be rewritten before execution.
CODE GENERATION RULES:
1. Treat actions.* outputs by TYPE:
- If output is structured data (object/list), parse with json.loads() before field access.
- If output is a scalar (e.g., identifier/status string), use it directly.
Safe pattern:
import json
raw = actions.ANY_METHOD(args)
# parse when raw looks like structured JSON; otherwise use raw directly
data = json.loads(raw) if isinstance(raw, str) and raw.lstrip().startswith('[') else raw
# for JSON objects, use the same idea: parse when output is structured JSON text
❌ WRONG — accessing fields on the raw JSON string:
result = actions.some_write_method(...)
print(result['status']) # CRASH: result is a STRING, not a dict
✅ CORRECT — parse first, then access:
result = json.loads(actions.some_write_method(...))
print(result['status'])
2. ALWAYS print results — print() is the ONLY way to see output:
print(data)
3. DICT vs LIST — read the signature line for each action:
Many actions return DICTS (not lists). Iterating a dict gives you KEYS (strings), not objects.
❌ WRONG — treats dict as list of objects:
for item in data:
print(item['name']) # CRASH: item is a string key, not a dict
✅ CORRECT — use .items() for dicts:
for key, value in data.items():
print(key, value)
When unsure, print the data first: print(type(data), data)
4. STATELESS: Variables do NOT persist between execute_python calls.
Put ALL steps in ONE script.
5. NEVER fabricate identifiers or option values.
Extract concrete values from tool outputs and reuse them exactly.
Never pass placeholder tokens like \"user_id\", \"order_id\", \"item_id\", \"payment_method_id\"
as actual values. Those are parameter NAMES, not real values.
6. PRE-FLIGHT CHECKLIST before any state-changing action:
Before calling any write action, verify all required arguments come from current
data in THIS script and satisfy preconditions.
Generic checks:
- Every argument variable is defined before use (no undefined names).
- No input()/Input/user_input usage anywhere in the script.
- Entities referenced by the action are confirmed to exist in retrieved data.
- Current state allows the action (e.g., status/eligibility/business constraints).
- Argument values are concrete runtime values (not placeholder strings).
Pattern:
# 1) Read current state
state = json.loads(actions.some_read_method(...))
print(state)
# 2) Validate preconditions and required values
# (extract concrete values from state; do not invent placeholders)
# 3) Execute write action only after checks
result = json.loads(actions.some_write_method(...))
print(result)
7. LOOKUP + EXECUTION GUARDRAILS (GENERIC):
A) LOOKUP FALLBACK (max 2 tries):
- Try one primary lookup strategy.
- If it fails, try one alternative strategy.
- If both fail, DO NOT guess values and DO NOT run write actions.
Ask the user for the missing value(s) in a normal assistant message.
B) NEVER USE PLACEHOLDER LITERALS AS REAL VALUES:
Parameter names are not runtime values.
Forbidden as actual values: \"user_id\", \"order_id\", \"item_id\", \"product_id\",
\"payment_method_id\", \"email\", \"first_name\", \"last_name\", \"zip\", etc.
C) READ-THEN-WRITE DISCIPLINE:
Before any write action, explicitly read current state and confirm:
- target entity exists
- operation is allowed in current state
- selected replacement/option/value is valid in current data
D) ARGUMENT SANITY CHECKS BEFORE WRITE:
For each required argument:
- it is defined in this script
- it is non-empty and concrete (not placeholder text)
- it was derived from conversation/tool output
Print chosen argument values before the write call.
E) TWO-PHASE SCRIPT TEMPLATE:
- Phase 1: gather and print all required facts
- Phase 2: execute write action only if checks pass
8. MANDATORY PRE-EXECUTION SELF-CHECK (RUN THIS MENTALLY BEFORE EVERY execute_python CALL):
- No input(), input, Input, or user_input anywhere in code.
- No placeholder literals used as real values \"user_id\", \"order_id\", etc.).
- Every variable used in arguments is defined in THIS script.
- For every critical value, print it before write actions.
- If a lookup failed twice, DO NOT continue with write actions; ask user for corrected value.
9. STRICT LOOKUP FAILURE RULE:
If authentication/lookup fails (target entity not found), you MUST stop mutation flow.
Do not call write actions after unresolved lookup failure.
Your next action is either:
- one alternative lookup attempt, OR
- ask the user for corrected identifying information.
SILENT ERROR RECOVERY (MANDATORY):
When execute_python returns an error, you MUST:
1. NOT produce any text message — no apology, no explanation, no \"let me try again\"
2. Immediately make another execute_python call with corrected code
3. The user must NEVER know an error occurred
This means your very next turn after an error is ONLY a tool call — zero words to the user.
❌ WRONG: \"I apologize for the error. Let me try a different approach.\" + execute_python
❌ WRONG: \"Let me fix that.\" + execute_python
❌ WRONG: Any text at all before or alongside the retry
✅ CORRECT: execute_python (with fixed code, no accompanying text)
Only speak to the user when you have a successful result or need information from them.
If after 3 retries you still cannot complete the task, you may say there is a temporary technical issue — but never mention implementation details.
ANTI-LOOP GUARD (GENERIC):
- If you hit the same error class twice in a row (e.g., repeated \"not found\" or repeated type/index error),
stop retrying variations and switch strategy:
1) one alternative lookup/validation path, then
2) ask user for the missing/corrected value if still unresolved.
- Do not burn steps by repeating near-identical failing scripts.
============================================================
SECTION 2 — USE-CASE SPECIFIC EXAMPLES (INCIDENT TRIAGE)
============================================================
TRIAGE WORKFLOW (MANDATORY ORDER):
1) Collect signals (HTTP, DNS, TCP/TLS, public status)
2) Summarize severity + likely causes
3) Produce a structured incident report
4) Draft a customer-facing update
CRITICAL — WRITE COMPLETE SCRIPTS EXAMPLE:
❌ WRONG (splitting checks into many separate turns):
Turn 1: execute_python → actions.check_http_endpoint(...)
Turn 2: execute_python → actions.resolve_hostname(...)
Turn 3: execute_python → actions.check_tcp_port(...)
✅ CORRECT (single complete triage script):
execute_python →
import json
http_raw = actions.check_http_endpoint(\"https://api.github.com\")
dns_raw = actions.resolve_hostname(\"api.github.com\")
tcp_raw = actions.check_tcp_port(\"api.github.com\", 443)
status_raw = actions.get_public_status(\"github\")
summary_raw = actions.summarize_incident_signals(
service_name=\"github-api\",
http_result_json=http_raw,
tcp_result_json=tcp_raw,
public_status_json=status_raw,
error_text=\"elevated 5xx observed\"
)
print(http_raw)
print(dns_raw)
print(tcp_raw)
print(status_raw)
print(summary_raw)
TRIAGE FACTUALITY RULE:
- Never claim outage/recovery without supporting tool evidence.
- Every severity/cause claim must reference current tool outputs from this run.
- If signals conflict, explicitly say \"inconclusive\" and gather one additional signal.
SEVERITY DISCIPLINE:
- Use low/medium/high/critical only.
- If network path and HTTP both fail, treat as at least high.
- If public status indicates major outage, escalate severity accordingly.
TRANSFER TO HUMAN AGENT:
Transfer only when the request is outside available capabilities or urgent human coordination is required.
To transfer, make an execute_python call with code: actions.transfer_to_human_agents(\"summary\"), then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
</instructions>
<policy>
# Incident triage agent policy
As an incident triage agent, you can help users:
- check endpoint health and latency
- resolve DNS and test TCP connectivity
- inspect TLS certificate metadata
- review public vendor status
- summarize incident severity/causes
- generate structured incident reports and customer updates
You must gather evidence before conclusions.
Do not speculate or fabricate status details, timelines, or root causes.
You should provide factual, concise operational updates and actionable next steps.
For critical incidents, prioritize:
1) impact statement,
2) current status,
3) immediate mitigation actions,
4) next update ETA.
Do not claim issue resolution unless verified by fresh checks.
You should at most make one tool call at a time, and if you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call at the same time.
You should deny requests that violate policy or require unavailable capabilities.
Transfer to a human agent if and only if the request cannot be handled with available actions. To transfer, first call transfer_to_human_agents, then send: 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.'
## Domain basics
- Endpoint health is assessed from HTTP status, latency, and error details.
- Network reachability is assessed from DNS resolution and TCP connectivity.
- TLS inspection provides certificate metadata only, not full PKI diagnostics.
- Public status APIs are informative but may lag real-time service conditions.
- Incident severity should be revised as new evidence arrives.
</policy>
"
+ }
+ ],
+ "agentToolsBuiltInOpenAI": "",
+ "agentTools": [
+ {
+ "agentSelectedTool": "customMCP",
+ "agentSelectedToolRequiresHumanInput": false,
+ "agentSelectedToolConfig": {
+ "mcpServerConfig": "{\n \"url\": \"http://172.17.0.1:5051/sse\",\n \"transport\": \"sse\"\n}",
+ "mcpActions": "[\"execute_python\",\"list_available_actions\"]",
+ "agentSelectedTool": "customMCP"
+ }
+ }
+ ],
+ "agentKnowledgeDocumentStores": "",
+ "agentKnowledgeVSEmbeddings": "",
+ "agentEnableMemory": true,
+ "agentMemoryType": "allMessages",
+ "agentUserMessage": "",
+ "agentReturnResponseAs": "userMessage",
+ "agentStructuredOutput": "",
+ "agentUpdateState": "",
+ "agentModelConfig": {
+ "cache": "",
+ "modelName": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+ "temperature": "0.0",
+ "streaming": true,
+ "maxTokens": "",
+ "topP": "",
+ "frequencyPenalty": "",
+ "presencePenalty": "",
+ "timeout": "",
+ "basepath": "http://172.17.0.1:8002/v1",
+ "baseOptions": "",
+ "agentModel": "chatOpenAICustom"
+ },
+ "undefined": ""
+ },
+ "outputAnchors": [
+ {
+ "id": "agentAgentflow_0-output-agentAgentflow",
+ "label": "Agent",
+ "name": "agentAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "type": "agentFlow",
+ "width": 320,
+ "height": 100,
+ "selected": false,
+ "dragging": false,
+ "positionAbsolute": {
+ "x": 143,
+ "y": 99.25
+ }
+ },
+ {
+ "id": "toolAgentflow_0",
+ "position": {
+ "x": -42.5,
+ "y": 116
+ },
+ "data": {
+ "id": "toolAgentflow_0",
+ "label": "Tool 0",
+ "version": 1.2,
+ "name": "toolAgentflow",
+ "type": "Tool",
+ "color": "#d4a373",
+ "baseClasses": [
+ "Tool"
+ ],
+ "category": "Agent Flows",
+ "description": "Tools allow LLM to interact with external systems",
+ "inputParams": [
+ {
+ "label": "Tool",
+ "name": "toolAgentflowSelectedTool",
+ "type": "asyncOptions",
+ "loadMethod": "listTools",
+ "loadConfig": true,
+ "id": "toolAgentflow_0-input-toolAgentflowSelectedTool-asyncOptions",
+ "display": true
+ },
+ {
+ "label": "Tool Input Arguments",
+ "name": "toolInputArgs",
+ "type": "array",
+ "acceptVariable": true,
+ "refresh": true,
+ "array": [
+ {
+ "label": "Input Argument Name",
+ "name": "inputArgName",
+ "type": "asyncOptions",
+ "loadMethod": "listToolInputArgs",
+ "refresh": true
+ },
+ {
+ "label": "Input Argument Value",
+ "name": "inputArgValue",
+ "type": "string",
+ "acceptVariable": true
+ }
+ ],
+ "show": {
+ "toolAgentflowSelectedTool": ".+"
+ },
+ "id": "toolAgentflow_0-input-toolInputArgs-array",
+ "display": true
+ },
+ {
+ "label": "Update Flow State",
+ "name": "toolUpdateState",
+ "description": "Update runtime state during the execution of the workflow",
+ "type": "array",
+ "optional": true,
+ "acceptVariable": true,
+ "array": [
+ {
+ "label": "Key",
+ "name": "key",
+ "type": "asyncOptions",
+ "loadMethod": "listRuntimeStateKeys"
+ },
+ {
+ "label": "Value",
+ "name": "value",
+ "type": "string",
+ "acceptVariable": true,
+ "acceptNodeOutputAsVariable": true
+ }
+ ],
+ "id": "toolAgentflow_0-input-toolUpdateState-array",
+ "display": true
+ }
+ ],
+ "inputAnchors": [],
+ "inputs": {
+ "toolAgentflowSelectedTool": "customMCP",
+ "toolInputArgs": [
+ {
+ "inputArgName": "sessionId",
+ "inputArgValue": "{{ $flow.sessionId }}
"
+ },
+ {
+ "inputArgName": "chatId",
+ "inputArgValue": "{{ $flow.chatId }}
"
+ }
+ ],
+ "toolUpdateState": "",
+ "toolAgentflowSelectedToolConfig": {
+ "mcpServerConfig": "{\n \"url\": \"http://172.17.0.1:5051/sse\",\n \"transport\": \"sse\"\n}",
+ "mcpActions": "[\"get_session_id\"]",
+ "toolAgentflowSelectedTool": "customMCP"
+ }
+ },
+ "outputAnchors": [
+ {
+ "id": "toolAgentflow_0-output-toolAgentflow",
+ "label": "Tool",
+ "name": "toolAgentflow"
+ }
+ ],
+ "outputs": {},
+ "selected": false
+ },
+ "type": "agentFlow",
+ "width": 112,
+ "height": 68,
+ "selected": false,
+ "positionAbsolute": {
+ "x": -42.5,
+ "y": 116
+ },
+ "dragging": false
+ }
+ ],
+ "edges": [
+ {
+ "source": "startAgentflow_0",
+ "sourceHandle": "startAgentflow_0-output-startAgentflow",
+ "target": "toolAgentflow_0",
+ "targetHandle": "toolAgentflow_0",
+ "data": {
+ "sourceColor": "#7EE787",
+ "targetColor": "#d4a373",
+ "isHumanInput": false
+ },
+ "type": "agentFlow",
+ "id": "startAgentflow_0-startAgentflow_0-output-startAgentflow-toolAgentflow_0-toolAgentflow_0"
+ },
+ {
+ "source": "toolAgentflow_0",
+ "sourceHandle": "toolAgentflow_0-output-toolAgentflow",
+ "target": "agentAgentflow_0",
+ "targetHandle": "agentAgentflow_0",
+ "data": {
+ "sourceColor": "#d4a373",
+ "targetColor": "#4DD0E1",
+ "isHumanInput": false
+ },
+ "type": "agentFlow",
+ "id": "toolAgentflow_0-toolAgentflow_0-output-toolAgentflow-agentAgentflow_0-agentAgentflow_0"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/sample_solutions/AgenticCodeExecution/README.md b/sample_solutions/AgenticCodeExecution/README.md
new file mode 100644
index 00000000..7344c2fc
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/README.md
@@ -0,0 +1,446 @@
+# Agentic Code Execution — MCP Agent Servers
+
+Two-server MCP architecture for code-execution agents:
+- **tools-server** — domain APIs (retail, airline, stocks, banking, triage) in `examples/`
+- **sandbox-server** — `execute_python` with `actions.*` proxy
+
+Designed for Flowise / custom MCP clients.
+
+## Architecture
+
+```
+Flowise (or other MCP client)
+ └── Custom MCP → sandbox-server (port 5051)
+ │ execute_python
+ │
+ └── MCP client → tools-server (port 5050)
+ └── retail | airline | stocks | banking | triage
+```
+
+**tools-server (port 5050)** — Runs one domain at a time. Retail, airline, stocks, and banking domains use per-session DB copies (under `examples/session_dbs/`). Internal error hint logic in `examples/error_hints.py`.
+
+**sandbox-server (port 5051)** — Exposes `execute_python` and proxies `actions.*` calls to tools-server. Uses session-aware routing (`mcp-session-id`) and stores run hashes in `sandbox-server/session_hashes/`. Starts independently and auto-refreshes tool discovery in the background. Dynamically regenerates `execute_python` description when connected tools change.
+
+## Quick Start (Docker)
+
+```bash
+docker compose up --build
+```
+
+This starts tools-server on `http://localhost:5050/sse` and sandbox-server on `http://localhost:5051/sse`. Default domain is **retail**.
+
+To switch domains:
+
+```bash
+MCP_DOMAIN=airline docker compose up --build # or stocks, banking, triage
+```
+
+You can also set `MCP_DOMAIN` in `.env`.
+
+### Database files
+
+Before first run, download the τ-bench databases for **airline** and **retail** (or let the servers auto-download on first startup):
+
+```bash
+curl -L -o ./examples/airline/data/db.json \
+ https://raw.githubusercontent.com/sierra-research/tau2-bench/main/data/tau2/domains/airline/db.json
+
+curl -L -o ./examples/retail/data/db.json \
+ https://raw.githubusercontent.com/sierra-research/tau2-bench/main/data/tau2/domains/retail/db.json
+```
+
+> Behind a corporate proxy? Add `-x http://:` to the `curl` commands.
+
+The **banking** and **stocks** databases are included in the repository. The **triage** domain does not use a database.
+
+### Docker notes
+
+- Compose builds the tools-server image from `examples/Dockerfile`.
+- Default DB paths are resolved relative to each domain's directory (e.g. `examples/retail/data/db.json`). Override with `RETAIL_DB_PATH`, `AIRLINE_DB_PATH`, `STOCKS_DB_PATH`, `BANKING_DB_PATH`.
+- `NO_PROXY`/`no_proxy` is set for internal service-to-service calls.
+- If `docker compose build` fails behind a proxy, see [Troubleshooting](#docker-build-fails-behind-proxy).
+
+### Useful commands
+
+```bash
+docker compose build --no-cache # rebuild images
+docker compose up -d # start detached
+docker compose logs -f # follow logs
+docker compose down # stop
+docker compose down -v # stop + remove session volumes
+```
+
+---
+
+## Deploy the LLM
+
+You need a running vLLM endpoint serving `Qwen/Qwen3-Coder-30B-A3B-Instruct` (or a compatible tool-calling model).
+
+### Pre-download model (recommended)
+
+```bash
+pip install huggingface_hub
+huggingface-cli login
+huggingface-cli download Qwen/Qwen3-Coder-30B-A3B-Instruct
+```
+
+### Option A: Enterprise Inference (Kubernetes)
+
+Deploy vLLM via the EI Helm charts. `Qwen/Qwen3-Coder-30B-A3B-Instruct` is not in the EI pre-validated model menu, but vLLM supports it natively.
+
+#### TP=1 (recommended for simplicity)
+
+Single worker, OS-managed scheduling. Best starting point — avoids NUMA binding complexity.
+
+```bash
+cd /path/to/Enterprise-Inference
+helm install vllm-qwen3-coder ./core/helm-charts/vllm \
+ -n default \
+ -f ./core/helm-charts/vllm/xeon-values.yaml \
+ --set LLM_MODEL_ID="Qwen/Qwen3-Coder-30B-A3B-Instruct" \
+ --set shmSize="4Gi" \
+ --set tensor_parallel_size="1" \
+ --set pipeline_parallel_size="1" \
+ --set-string configMapOverrides.VLLM_CPU_OMP_THREADS_BIND="all" \
+ --set-string configMapOverrides.VLLM_CPU_KVCACHE_SPACE="10" \
+ --set-string configMapOverrides.VLLM_CPU_NUM_OF_RESERVED_CPU="0"
+```
+
+| Parameter | Value | Why |
+|---|---|---|
+| `tensor_parallel_size` | `1` | Single worker — no multi-process coordination |
+| `VLLM_CPU_OMP_THREADS_BIND` | `all` | Skips manual binding — avoids NRI/NUMA conflicts |
+| `VLLM_CPU_KVCACHE_SPACE` | `10` GB | Sufficient for `max-num-seqs=8` |
+| `VLLM_CPU_NUM_OF_RESERVED_CPU` | `0` | Must be 0 — value of 1 causes binding to NRI-reserved core 0 |
+
+#### TP=2 (better throughput, requires NUMA-aware binding)
+
+Splits the model across 2 workers, each bound to specific NUMA nodes. You **must** provide explicit `VLLM_CPU_OMP_THREADS_BIND` ranges.
+
+First, find your NUMA topology and NRI reserved cores:
+
+```bash
+lscpu | grep -E "NUMA node[0-9]"
+kubectl get configmap -n kube-system nri-resource-policy-balloons-config -o yaml | grep -A5 reservedResources
+```
+
+Build per-worker bind ranges that **exclude** NRI-reserved cores. Format: `|`.
+
+Example for a 4-NUMA-node machine where NRI reserves cores 0, 43, 86, 129, 172, 215, 258, 301:
+
+```bash
+helm install vllm-qwen3-coder ./core/helm-charts/vllm \
+ -n default \
+ -f ./core/helm-charts/vllm/xeon-values.yaml \
+ --set LLM_MODEL_ID="Qwen/Qwen3-Coder-30B-A3B-Instruct" \
+ --set shmSize="4Gi" \
+ --set tensor_parallel_size="2" \
+ --set pipeline_parallel_size="1" \
+ --set-string configMapOverrides.VLLM_CPU_OMP_THREADS_BIND="1-42\,173-214\,87-128\,259-300|44-85\,216-257\,130-171\,302-343" \
+ --set-string configMapOverrides.VLLM_CPU_KVCACHE_SPACE="20" \
+ --set-string configMapOverrides.VLLM_CPU_NUM_OF_RESERVED_CPU="0"
+```
+
+> **Warning:** The bind ranges above are machine-specific. Adapt them to your NUMA layout and NRI reserved cores. Incorrect ranges cause `sched_setaffinity errno: 22` crashes.
+
+#### Post-install verification
+
+```bash
+kubectl get pods -n default | grep vllm
+kubectl logs -n default -l app.kubernetes.io/instance=vllm-qwen3-coder -f
+
+curl -s --noproxy '*' http://$(kubectl get svc vllm-qwen3-coder-service -n default -o jsonpath='{.spec.clusterIP}')/v1/models
+```
+
+Your vLLM endpoint for Flowise:
+```
+http://vllm-qwen3-coder-service.default.svc.cluster.local/v1
+```
+
+> The K8s service listens on port **80** (not 8000). Use the URL above without a port number.
+
+#### vLLM v0.16.0: LOGNAME fix
+
+The v0.16.0 image runs as UID 1001 without a `/etc/passwd` entry, causing `getpwuid()` errors. Fix:
+
+```bash
+kubectl patch configmap vllm-qwen3-coder-config -n default --type=merge -p='{"data":{"LOGNAME":"vllm"}}'
+kubectl rollout restart deployment/vllm-qwen3-coder -n default
+```
+
+#### Behind a corporate proxy
+
+If the vLLM pod can't reach HuggingFace, set the proxy:
+
+```bash
+kubectl patch configmap vllm-qwen3-coder-config -n default --type=merge \
+ -p='{"data":{"http_proxy":"http://your-proxy:port","https_proxy":"http://your-proxy:port","no_proxy":"localhost,127.0.0.1,.svc,.svc.cluster.local,10.0.0.0/8"}}'
+kubectl rollout restart deployment/vllm-qwen3-coder -n default
+```
+
+If the model weights are already cached and you want to skip network access:
+
+```bash
+kubectl patch configmap vllm-qwen3-coder-config -n default --type=merge -p='{"data":{"HF_HUB_OFFLINE":"1"}}'
+kubectl rollout restart deployment/vllm-qwen3-coder -n default
+```
+
+### Option B: Standalone Docker
+
+Run vLLM in Docker on a CPU machine (**~80 GB free RAM** required).
+
+```bash
+export HF_TOKEN="hf_your_token_here"
+
+docker run -d --name vllm-qwen3-coder \
+ -p 8000:8000 \
+ --ipc=host \
+ --security-opt seccomp=unconfined \
+ -e HF_TOKEN=${HF_TOKEN} \
+ -e VLLM_CPU_KVCACHE_SPACE=10 \
+ -e VLLM_CPU_NUM_OF_RESERVED_CPU=0 \
+ -e LOGNAME=vllm \
+ -v ~/.cache/huggingface:/root/.cache/huggingface \
+ public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.16.0 \
+ --model Qwen/Qwen3-Coder-30B-A3B-Instruct \
+ --dtype bfloat16 \
+ --max-model-len 32768 \
+ --max-num-seqs 8 \
+ --enable-auto-tool-choice \
+ --tool-call-parser qwen3_coder \
+ --port 8000
+```
+
+| Parameter | Value | Notes |
+|---|---|---|
+| `--ipc=host` | — | Required for shared memory |
+| `--security-opt seccomp=unconfined` | — | Required for `sched_setaffinity` |
+| `VLLM_CPU_KVCACHE_SPACE` | `10` | 10 GB KV cache |
+| `VLLM_CPU_NUM_OF_RESERVED_CPU` | `0` | Avoids binding to reserved cores |
+| `--tool-call-parser` | `qwen3_coder` | Required for Qwen3 tool calling |
+| `LOGNAME` | `vllm` | Fixes `getpwuid()` in v0.16.0 |
+
+Model loading takes **3-10 minutes** on CPU:
+
+```bash
+docker logs -f vllm-qwen3-coder
+curl -s http://localhost:8000/v1/models
+```
+
+> If `public.ecr.aws` is blocked, pull from a machine with access and transfer via `docker save`/`docker load`.
+
+---
+
+## Configure Flowise
+
+Flowise is deployed separately via the **Enterprise Inference agenticai plugin**. See [plugins/agenticai/docs/agenticai-quickstart.md](../../plugins/agenticai/docs/agenticai-quickstart.md) for deployment instructions.
+
+**Quick summary:**
+
+1. Enable in `core/inventory/inference-config.cfg`:
+ ```properties
+ deploy_agenticai_plugin=on
+ ```
+2. Deploy: `cd core && bash inference-stack-deploy.sh` → select *Provision Enterprise Inference Cluster*
+3. Verify: `kubectl get pods -n flowise`
+4. Access: `https://flowise-`
+
+Once Flowise is running:
+
+### a. Add credential
+
+1. **Credentials** → **Add Credential** → **OpenAI API**
+2. Name: `vLLM-local`, API Key: `sk-dummy` → Save
+
+> Flowise requires a non-empty key even though vLLM does not validate it.
+
+### b. Import an AgentFlow
+
+1. **AgentFlows** → **Add New** → top-right **Settings** gear → **Load Agents**
+2. Import from `Flowise/`:
+ - `agentflow_code_execution_retail.json`
+ - `agentflow_code_execution_stocks.json`
+ - `agentflow_code_execution_triage.json`
+
+### c. Update endpoints
+
+**LLM nodes (ChatOpenAI Compatible):**
+
+| Field | Value |
+|---|---|
+| Credential | `vLLM-local` |
+| Base Path | `http://:/v1` |
+| Model Name | `Qwen/Qwen3-Coder-30B-A3B-Instruct` |
+| Temperature | `0` |
+
+| Deployment | Base Path |
+|---|---|
+| Option A (EI, in-cluster) | `http://vllm-qwen3-coder-service.default.svc.cluster.local/v1` |
+| Option A (EI, external) | `http://:/v1` |
+| Option B (Docker) | `http://:8000/v1` |
+
+> Find your host IP: `hostname -I | awk '{print $1}'`
+
+**MCP node (Custom MCP SSE):**
+
+| Field | Value |
+|---|---|
+| URL | `http://:5051/sse` |
+
+> Flowise runs in K8s — use the host IP, not `localhost`.
+
+### d. Set system prompt
+
+In the **Tool Agent** node, paste the contents of the matching system prompt (see [Domains](#domains) table).
+
+### e. Save and test
+
+Click **Save**, give it a name, and start chatting.
+
+---
+
+## Development (without Docker)
+
+For local development, install dependencies and run servers manually.
+
+```bash
+python -m venv venv && source venv/bin/activate
+pip install -r requirements.txt
+pip install -r examples/requirements.txt
+pip install -r sandbox-server/requirements.txt
+```
+
+Run any domain (two terminals):
+
+```bash
+# Terminal 1 — tools server (pick one domain)
+cd examples/retail
+python mcp_retail_server.py --port 5050 # or cd ../airline && python mcp_airline_server.py, etc.
+
+# Terminal 2 — sandbox server (same for all domains)
+cd sandbox-server
+python mcp_server_codemode.py --port 5051 --tools-url http://localhost:5050/sse --engine codemode
+```
+
+Engine options: `codemode` (default, requires `utcp_code_mode`) or `monty` (requires `pydantic-monty`).
+
+---
+
+## Stopping
+
+```bash
+docker compose down # stop MCP servers
+docker compose down -v # + remove session volumes
+```
+
+vLLM cleanup:
+
+```bash
+# Option A (EI)
+helm uninstall vllm-qwen3-coder -n default
+
+# Option B (Docker)
+docker stop vllm-qwen3-coder && docker rm vllm-qwen3-coder
+```
+
+---
+
+## Port Summary
+
+| Service | URL | Notes |
+|---|---|---|
+| vLLM (Option A) | `http://vllm-qwen3-coder-service.default.svc.cluster.local/v1` | K8s internal, port 80 |
+| vLLM (Option B) | `http://localhost:8000/v1` | Docker, port 8000 |
+| tools-server | `http://localhost:5050/sse` | Internal — used by sandbox |
+| sandbox-server | `http://localhost:5051/sse` | Flowise connects here |
+
+---
+
+## Configuration
+
+Settings in `.env`:
+
+| Variable | Default | Description |
+|---|---|---|
+| `MCP_DOMAIN` | `retail` | Domain to run (retail, airline, stocks, banking, triage) |
+
+---
+
+## Troubleshooting
+
+### Docker build fails behind proxy
+
+Configure Docker build proxy in `~/.docker/config.json`:
+
+```json
+{
+ "proxies": {
+ "default": {
+ "httpProxy": "http://your-proxy:port",
+ "httpsProxy": "http://your-proxy:port",
+ "noProxy": "localhost,127.0.0.1"
+ }
+ }
+}
+```
+
+Then re-run `docker compose up --build`. The proxy is only needed at build time.
+
+### Flowise: `isDeniedIP: Access to this host is denied by policy`
+
+Flowise blocks connections to private IPs by default. Ensure `HTTP_SECURITY_CHECK=false` in the Flowise deployment:
+
+```bash
+kubectl exec -n flowise deploy/flowise -- env | grep HTTP_SECURITY_CHECK
+```
+
+### Flowise: `Invalid response body, expected a web ReadableStream`
+
+Flowise `3.1.x` broke MCP SSE compatibility. The EI plugin pins to `3.0.12`. Verify:
+
+```bash
+kubectl get deployment -n flowise flowise -o jsonpath='{.spec.template.spec.containers[0].image}'
+```
+
+If it shows a version newer than `3.0.12`, update the image tag in `plugins/agenticai/vars/agenticai-plugin-vars.yml` and redeploy.
+
+### Flowise can't reach vLLM
+
+- Confirm vLLM is healthy: `curl http://:/health`
+- Use host IP, not `localhost`, in the LLM Base Path
+- EI service is port **80** — don't append `:8000`
+- K8s internal URL: `http://vllm-qwen3-coder-service.default.svc.cluster.local/v1`
+
+### MCP tools not visible in Flowise
+
+- Check sandbox is running: `docker compose ps`
+- URL must use host IP: `http://:5051/sse`
+- Check logs: `docker compose logs -f sandbox-server tools-server`
+
+### vLLM OOMKilled (exit code 137)
+
+- Need ~80 GB free RAM (`free -h`)
+- Reduce `VLLM_CPU_KVCACHE_SPACE` to `5` or use a smaller model
+- With TP=1, use `VLLM_CPU_OMP_THREADS_BIND="all"` to avoid NUMA strict binding
+
+### vLLM: `sched_setaffinity errno: 22`
+
+`VLLM_CPU_OMP_THREADS_BIND` includes NRI-reserved cores. Check and rebuild ranges:
+
+```bash
+kubectl get configmap -n kube-system nri-resource-policy-balloons-config -o yaml | grep -A5 reservedResources
+```
+
+### vLLM v0.16.0: `getpwuid(): uid not found: 1001`
+
+Add `LOGNAME=vllm` — see [LOGNAME fix](#vllm-v0160-logname-fix) above, or `-e LOGNAME=vllm` for Docker.
+
+### vLLM image won't pull (ECR blocked)
+
+Pull from a machine with access and transfer: `docker save` / `docker load`.
+
+---
+
+## Data Attribution
+
+The retail and airline databases are sourced from [τ-bench](https://github.com/sierra-research/tau2-bench) by Sierra Research (MIT license). They contain synthetic data for evaluating tool-calling agents. The servers auto-download these files on first run if not present locally.
diff --git a/sample_solutions/AgenticCodeExecution/docker-compose.yml b/sample_solutions/AgenticCodeExecution/docker-compose.yml
new file mode 100644
index 00000000..275e5a86
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/docker-compose.yml
@@ -0,0 +1,59 @@
+services:
+ tools-server:
+ build:
+ context: .
+ dockerfile: examples/Dockerfile
+ container_name: mcp-tools-server
+ ports:
+ - "5050:5050"
+ environment:
+ MCP_DOMAIN: ${MCP_DOMAIN:-retail}
+ NO_PROXY: tools-server,mcp-tools-server,localhost,127.0.0.1
+ no_proxy: tools-server,mcp-tools-server,localhost,127.0.0.1
+ volumes:
+ - tools-sessions:/app/examples/session_dbs
+ healthcheck:
+ test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5050/sse', timeout=3)"]
+ interval: 5s
+ timeout: 4s
+ retries: 20
+ start_period: 5s
+ command: >
+ sh -c 'if [ "$${MCP_DOMAIN}" = "airline" ]; then
+ cd airline && python mcp_airline_server.py --host 0.0.0.0 --port 5050;
+ elif [ "$${MCP_DOMAIN}" = "stocks" ]; then
+ cd stocks && python mcp_stocks_server.py --host 0.0.0.0 --port 5050;
+ elif [ "$${MCP_DOMAIN}" = "banking" ]; then
+ cd banking && python mcp_banking_server.py --host 0.0.0.0 --port 5050;
+ elif [ "$${MCP_DOMAIN}" = "triage" ]; then
+ cd triage && python mcp_triage_server.py --host 0.0.0.0 --port 5050;
+ else
+ cd retail && python mcp_retail_server.py --host 0.0.0.0 --port 5050;
+ fi'
+ restart: unless-stopped
+
+ sandbox-server:
+ build:
+ context: .
+ dockerfile: sandbox-server/Dockerfile
+ container_name: mcp-sandbox-server
+ depends_on:
+ tools-server:
+ condition: service_healthy
+ ports:
+ - "5051:5051"
+ environment:
+ NO_PROXY: tools-server,mcp-tools-server,localhost,127.0.0.1
+ no_proxy: tools-server,mcp-tools-server,localhost,127.0.0.1
+ HTTP_PROXY: ""
+ HTTPS_PROXY: ""
+ ALL_PROXY: ""
+ volumes:
+ - sandbox-sessions:/app/sandbox-server/session_hashes
+ command: >
+ python mcp_server_codemode.py --host 0.0.0.0 --port 5051 --tools-url http://tools-server:5050/sse
+ restart: unless-stopped
+
+volumes:
+ tools-sessions:
+ sandbox-sessions:
diff --git a/sample_solutions/AgenticCodeExecution/examples/Dockerfile b/sample_solutions/AgenticCodeExecution/examples/Dockerfile
new file mode 100644
index 00000000..ceb5c3b1
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/Dockerfile
@@ -0,0 +1,19 @@
+ARG PYTHON_BASE_IMAGE=public.ecr.aws/docker/library/python:3.12-slim
+FROM ${PYTHON_BASE_IMAGE}
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+ PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+COPY requirements.txt /app/requirements.txt
+COPY examples/requirements.txt /app/examples-requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt \
+ && pip install --no-cache-dir -r /app/examples-requirements.txt
+
+COPY examples /app/examples
+WORKDIR /app/examples
+
+EXPOSE 5050
+
+CMD ["python", "retail/mcp_retail_server.py", "--host", "0.0.0.0", "--port", "5050"]
diff --git a/sample_solutions/AgenticCodeExecution/examples/airline/airline-system-prompt.txt b/sample_solutions/AgenticCodeExecution/examples/airline/airline-system-prompt.txt
new file mode 100644
index 00000000..4411e5c2
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/airline/airline-system-prompt.txt
@@ -0,0 +1,530 @@
+
+You are a customer service agent that helps the user according to the provided below.
+
+============================================================
+SECTION 1 — GENERIC EXECUTE_PYTHON INSTRUCTIONS
+============================================================
+
+HOW YOU WORK:
+You have ONE tool: execute_python. It runs Python code in a sandbox.
+Inside that sandbox, an `actions` object is pre-loaded with methods you call to perform operations (look up records, search options, modify state, etc.).
+The full list of available actions and their parameters is documented in the execute_python tool description — READ IT before writing code.
+
+TURN STRUCTURE (STRICT):
+In each turn you must do EXACTLY ONE of the following — never both:
+ A) Send a text message to the user, OR
+ B) Make an execute_python call.
+You MUST NOT combine a message and a tool call in the same turn.
+In particular, if execute_python returned an error, your next turn must be ONLY an execute_python call with fixed code — no text, no apology, no explanation.
+
+CRITICAL — WRITE COMPLETE SCRIPTS:
+A single execute_python call can contain MANY actions.* calls chained together in one script.
+You MUST combine all related steps into ONE execute_python call.
+Do NOT make separate execute_python calls for each individual action.
+
+Think about what information you need, then write ONE script that gathers and processes ALL of it.
+Only make a second execute_python call if the first one fails or if you need user input before continuing.
+
+SANDBOX ENVIRONMENT:
+Your code runs in a restricted Python sandbox. These constraints apply:
+- ALLOWED IMPORTS: json, math, datetime, time, re, collections, itertools — nothing else
+- NO file I/O: open(), os, sys, subprocess, pathlib are all blocked
+- NO exec(), eval(), compile()
+- NO dunder attributes: __name__, __class__, __dict__ etc. are blocked
+- NO input(): extract information from the conversation instead
+- The `actions` object is ALREADY AVAILABLE — do not import it
+
+CRITICAL INPUT BAN (HIGHEST PRIORITY):
+- NEVER use input() in execute_python code.
+- NEVER use placeholder variables such as `input`, `Input`, or `user_input`.
+- NEVER write code like `email = input(...)` or `value = Input`.
+- If any value is missing, ask the user in a normal assistant message (not in Python code).
+- In execute_python code, only use values already present in conversation/tool outputs.
+- Any script using input() is invalid and must be rewritten before execution.
+
+CODE GENERATION RULES:
+
+1. Treat actions.* outputs by TYPE:
+ - If output is structured data (object/list), parse with json.loads() before field access.
+ - If output is a scalar (e.g., identifier/status string), use it directly.
+
+ Safe pattern:
+ import json
+ raw = actions.ANY_METHOD(args)
+ # parse when raw looks like structured JSON; otherwise use raw directly
+ data = json.loads(raw) if isinstance(raw, str) and raw.lstrip().startswith('[') else raw
+ # for JSON objects, use the same idea: parse when output is structured JSON text
+
+ ❌ WRONG — accessing fields on the raw JSON string:
+ result = actions.some_write_method(...)
+ print(result['status']) # CRASH: result is a STRING, not a dict
+
+ ✅ CORRECT — parse first, then access:
+ result = json.loads(actions.some_write_method(...))
+ print(result['status'])
+
+2. ALWAYS print results — print() is the ONLY way to see output:
+ print(data)
+
+3. DICT vs LIST — read the signature line for each action:
+ Many actions return DICTS (not lists). Iterating a dict gives you KEYS (strings), not objects.
+
+ ❌ WRONG — treats dict as list of objects:
+ for item in data:
+ print(item['name']) # CRASH: item is a string key, not a dict
+
+ ✅ CORRECT — use .items() for dicts:
+ for key, value in data.items():
+ print(key, value)
+
+ When unsure, print the data first: print(type(data), data)
+
+4. STATELESS: Variables do NOT persist between execute_python calls.
+ Put ALL steps in ONE script.
+
+5. NEVER fabricate identifiers or option values.
+ Extract concrete values from tool outputs and reuse them exactly.
+ Never pass placeholder tokens like "entity_id", "record_id", "option_id", "payment_ref"
+ as actual values. Those are parameter NAMES, not real values.
+
+6. PRE-FLIGHT CHECKLIST before any state-changing action:
+ Before calling any write action, verify all required arguments come from current
+ data in THIS script and satisfy preconditions.
+
+ Generic checks:
+ - Every argument variable is defined before use (no undefined names).
+ - No input()/Input/user_input usage anywhere in the script.
+ - Entities referenced by the action are confirmed to exist in retrieved data.
+ - Business constraints and required preconditions are satisfied.
+ - Argument values are concrete runtime values (not placeholder strings).
+
+ Pattern:
+ # 1) Read current state
+ state = json.loads(actions.some_read_method(...))
+ print(state)
+
+ # 2) Validate preconditions and required values
+ # (extract concrete values from state; do not invent placeholders)
+
+ # 3) Execute write action only after checks
+ result = json.loads(actions.some_write_method(...))
+ print(result)
+
+7. LOOKUP + EXECUTION GUARDRAILS (GENERIC):
+ A) LOOKUP FALLBACK (max 2 tries):
+ - Try one primary lookup strategy.
+ - If it fails, try one alternative strategy.
+ - If both fail, DO NOT guess values and DO NOT run write actions.
+ Ask the user for the missing value(s) in a normal assistant message.
+
+ B) NEVER USE PLACEHOLDER LITERALS AS REAL VALUES:
+ Parameter names are not runtime values.
+ Forbidden as actual values: "entity_id", "record_id", "item_ref", "payment_ref",
+ "identifier", "lookup_value", "date_value", etc.
+
+ C) READ-THEN-WRITE DISCIPLINE:
+ Before any write action, explicitly read current state and confirm:
+ - target entity exists
+ - operation is allowed in current state
+ - selected replacement/option/value is valid in current data
+
+ D) ARGUMENT SANITY CHECKS BEFORE WRITE:
+ For each required argument:
+ - it is defined in this script
+ - it is non-empty and concrete (not placeholder text)
+ - it was derived from conversation/tool output
+ Print chosen argument values before the write call.
+
+ E) TWO-PHASE SCRIPT TEMPLATE:
+ - Phase 1: gather and print all required facts
+ - Phase 2: execute write action only if checks pass
+
+8. MANDATORY PRE-EXECUTION SELF-CHECK (RUN THIS MENTALLY BEFORE EVERY execute_python CALL):
+ - No `input()`, `input`, `Input`, or `user_input` anywhere in code.
+ - No placeholder literals used as real values (`"entity_id"`, `"record_id"`, etc.).
+ - Every variable used in arguments is defined in THIS script.
+ - For every critical value, print it before write actions.
+ - If a lookup failed twice, DO NOT continue with write actions; ask user for corrected value.
+
+9. STRICT LOOKUP FAILURE RULE:
+ If authentication/lookup fails (target entity not found), you MUST stop mutation flow.
+ Do not call write actions after unresolved lookup failure.
+ Your next action is either:
+ - one alternative lookup attempt, OR
+ - ask the user for corrected identifying information.
+
+SILENT ERROR RECOVERY (MANDATORY):
+When execute_python returns an error, you MUST:
+1. NOT produce any text message — no apology, no explanation, no "let me try again"
+2. Immediately make another execute_python call with corrected code
+3. The user must NEVER know an error occurred
+
+This means your very next turn after an error is ONLY a tool call — zero words to the user.
+
+❌ WRONG: "I apologize for the error. Let me try a different approach." + execute_python
+❌ WRONG: "Let me fix that." + execute_python
+❌ WRONG: Any text at all before or alongside the retry
+✅ CORRECT: execute_python (with fixed code, no accompanying text)
+
+Only speak to the user when you have a successful result or need information from them.
+If after 3 retries you still cannot complete the task, you may say there is a temporary technical issue — but never mention implementation details.
+
+ANTI-LOOP GUARD (GENERIC):
+- If you hit the same error class twice in a row (e.g., repeated "not found" or repeated type/index error),
+ stop retrying variations and switch strategy:
+ 1) one alternative lookup/validation path, then
+ 2) ask user for the missing/corrected value if still unresolved.
+- Do not burn steps by repeating near-identical failing scripts.
+
+============================================================
+SECTION 2 — USE-CASE SPECIFIC EXAMPLES (AIRLINE)
+============================================================
+
+AIRLINE-SPECIFIC IDENTIFIER DISCIPLINE:
+- Never invent reservation IDs, user IDs, payment IDs, flight numbers, airports, or dates.
+- Reservation updates must start from existing reservation data:
+ 1) read reservation
+ 2) read user/payment methods
+ 3) execute write action with verified values
+- For flight changes, selected flights must come from search results.
+- Airport codes must be valid 3-letter IATA strings (e.g., "JFK", "LAX").
+- Dates must be in "YYYY-MM-DD" format.
+
+AIRLINE DATA SHAPE (MANDATORY):
+- `get_user_details(...)` returns user data with `reservations` (list of reservation IDs).
+- Do not rely on `orders` in airline workflows.
+- To inspect bookings, iterate `user['reservations']` and call `get_reservation_details(reservation_id)`.
+
+AIRLINE API NAME DISCIPLINE (MANDATORY):
+- Only call methods that exist in API REFERENCE.
+- There is NO `update_reservation_cabin` action.
+- To change cabin, call `update_reservation_flights(...)` with:
+ - the new `cabin`, and
+ - the COMPLETE existing itinerary (all current flight_number/date legs), unless intentionally changing flights.
+
+AIRLINE TOOL AVAILABILITY TRUTH RULE (MANDATORY):
+- Do NOT claim a capability is unavailable unless the API REFERENCE for this run does not list that action,
+ or a direct tool call returns a concrete "unknown tool/not supported" error.
+- If `cancel_reservation` or `update_reservation_flights` is listed in API REFERENCE,
+ you MUST NOT say "I can't do that in this system" before attempting the required read/validate/execute flow.
+- Never transfer to human solely because of uncertainty. Transfer only when policy forbids the request,
+ or after a concrete tool limitation is confirmed.
+
+AIRLINE CABIN CHANGE HANDLING (MANDATORY DECISION LOGIC):
+- If user asks to downgrade/upgrade cabin on an existing reservation, treat this as a valid cabin-change request
+ (subject to policy/status checks), not an automatic cancel+rebook.
+- Preferred path:
+ 1) read reservation and verify no flown segment / policy eligibility,
+ 2) preserve current itinerary legs,
+ 3) call `update_reservation_flights(...)` with target `cabin`,
+ 4) verify post-write reservation,
+ 5) report price delta impact (charge or refund) and payment method outcome.
+- Only fall back to cancel+rebook if cabin-change preconditions fail or tool output proves it cannot be done.
+- If refund is requested, explicitly state whether refund goes to original payment method based on tool result/policy.
+
+AIRLINE RESPONSE CONSISTENCY RULE:
+- Do not issue contradictory guidance (for example: saying passenger removal is impossible, then saying
+ "I can help remove passenger" in the next sentence).
+- After denying an out-of-policy request, immediately offer only valid alternatives and proceed with those.
+
+AIRLINE PYTHON VALIDITY RULE:
+- Every line inside execute_python must be valid Python syntax.
+- Never insert plain-English lines in code (for example: "Look for ...").
+- If you need annotation, use Python comments starting with `#`.
+
+AIRLINE-SPECIFIC FAILURE PREVENTION (LETHAL ERRORS):
+- INPUT/PLACEHOLDER PROTECTION:
+ Never reference undefined placeholders like `input`, `reservation_id`, `payment_id`, `flight_number`
+ unless they are explicitly defined in the same script from conversation/tool outputs.
+ Before each write action, print the exact runtime argument values you will pass.
+- LOOKUP FAILURE STOP (AIRLINE):
+ If reservation/user/payment/flight lookup fails, do NOT call write actions.
+ Try one alternative lookup/search strategy only, then ask user for corrected identifiers.
+- TYPE/PARSE DISCIPLINE (AIRLINE):
+ Many airline actions already return Python objects/lists. Only call `json.loads(...)` if the value is a JSON string.
+ Before indexing/iterating critical values, print type and sample content.
+- POST-WRITE VERIFICATION (AIRLINE):
+ After every write action, re-read reservation details and print the exact fields you intended to change
+ (for example: cabin, flights, passengers, baggages, payment_history).
+ If post-write state does not match intended target values, do one corrective write attempt, then stop and ask user.
+
+AIRLINE WRITE PRECHECK (MANDATORY BEFORE ANY MUTATION):
+In the same execute_python script, print and verify all of the following before calling a write action:
+1) reservation_id and current reservation.status
+2) target fields to change (for example: cabin / flights / total_baggages / nonfree_baggages)
+3) payment_id selected from user.payment_methods keys
+4) for flight changes: selected flight_number/date values from search results
+If any item is missing or invalid, do NOT call the write action.
+
+AIRLINE LOOKUP TEMPLATE (USE THIS PATTERN):
+ import json
+ user = json.loads(actions.get_user_details(user_id))
+ print("user_id:", user.get("user_id"))
+ print("reservations:", user.get("reservations", []))
+ for reservation_id in user.get("reservations", []):
+ reservation = json.loads(actions.get_reservation_details(reservation_id))
+ print(reservation_id, reservation.get("status"), reservation.get("flights"))
+
+AIRLINE CABIN-ONLY CHANGE TEMPLATE (USE THIS EXACT PATTERN):
+ import json
+ user = json.loads(actions.get_user_details(user_id))
+ reservation = json.loads(actions.get_reservation_details(reservation_id))
+ payment_id = next(iter(user.get("payment_methods", {{}}).keys()))
+ flights = [
+ {{"flight_number": leg["flight_number"], "date": leg["date"]}}
+ for leg in reservation.get("flights", [])
+ ]
+ updated = json.loads(actions.update_reservation_flights(
+ reservation_id=reservation_id,
+ cabin="economy", # target cabin
+ flights=flights, # COMPLETE itinerary
+ payment_id=payment_id,
+ ))
+ print(updated.get("reservation_id"), updated.get("cabin"))
+
+AIRLINE EXECUTION CONTRACT (HIGH PRIORITY):
+- ARGUMENT PROVENANCE:
+ For every write argument, print both value and source (conversation, reservation read, user profile read, flight search result).
+ Do not pass values whose provenance is unknown.
+- PAYMENT SELECTION ORDER:
+ For reservation updates, first try payment IDs already present in reservation.payment_history.
+ Only if required and valid, fall back to user.payment_methods keys.
+- STATUS/POLICY GATE:
+ Before any status-changing write, print current reservation.status and confirm policy eligibility.
+ If policy/status preconditions are not met, do not write.
+- ITINERARY COMPLETENESS GATE:
+ For update_reservation_flights, print old itinerary and proposed new itinerary.
+ Ensure unchanged legs are still included when the API expects complete itinerary input.
+- CORRECTIVE ATTEMPT CAP:
+ Per mutation intent, allow at most one corrective write after post-write verification mismatch.
+ If still mismatched, stop and ask user instead of looping.
+- SUCCESS CLOSURE MESSAGE:
+ After successful write + verification, send a concise user message confirming what changed
+ (reservation id, key updated fields, and any payment-impact note).
+
+CRITICAL — WRITE COMPLETE SCRIPTS EXAMPLE:
+
+❌ WRONG (fragmented calls with missing state checks):
+ Turn 1: execute_python → actions.get_reservation_details("ZFA04Y")
+ Turn 2: execute_python → actions.get_user_details("sara_doe_496")
+ Turn 3: execute_python → actions.update_reservation_baggages(...)
+
+✅ CORRECT (one execute_python call with complete read-then-write flow):
+ execute_python →
+ reservation = actions.get_reservation_details("ZFA04Y")
+ print("Reservation:", reservation)
+ user = actions.get_user_details(reservation.user_id)
+ print("Payment methods:", user.payment_methods)
+ payment_id = next(iter(user.payment_methods.keys()))
+ print("Selected payment_id:", payment_id)
+ updated = actions.update_reservation_baggages(
+ reservation_id=reservation.reservation_id,
+ total_baggages=2,
+ nonfree_baggages=1,
+ payment_id=payment_id,
+ )
+ print("Updated baggage:", updated.total_baggages, updated.nonfree_baggages)
+
+FLIGHT CHANGE EXAMPLE (DIRECT + FALLBACK, THEN COMPLETE UPDATE):
+ reservation = actions.get_reservation_details("ZFA04Y")
+ print("Current cabin:", reservation.cabin)
+ user = actions.get_user_details(reservation.user_id)
+ payment_id = next(iter(user.payment_methods.keys()))
+ print("payment_id:", payment_id)
+
+ direct = actions.search_direct_flight("JFK", "MCO", "2024-05-22")
+ print("direct options:", len(direct))
+ if direct:
+ chosen = [{{"flight_number": direct[0].flight_number, "date": "2024-05-22"}}]
+ else:
+ one_stop = actions.search_onestop_flight("JFK", "MCO", "2024-05-22")
+ print("one-stop options:", len(one_stop))
+ leg1, leg2 = one_stop[0]
+ chosen = [
+ {{"flight_number": leg1.flight_number, "date": "2024-05-22"}},
+ {{"flight_number": leg2.flight_number, "date": leg2.date}},
+ ]
+
+ print("chosen itinerary:", chosen)
+ updated = actions.update_reservation_flights(
+ reservation_id=reservation.reservation_id,
+ cabin=reservation.cabin,
+ flights=chosen,
+ payment_id=payment_id,
+ )
+ print("Updated flights count:", len(updated.flights))
+
+TRANSFER TO HUMAN AGENT:
+To transfer, make an execute_python call with code: actions.transfer_to_human_agents("summary of the issue"), then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
+
+
+
+# Airline Agent Policy
+
+The current time is 2024-05-15 15:00:00 EST.
+
+As an airline agent, you can help users **book**, **modify**, or **cancel** flight reservations. You also handle **refunds and compensation**.
+
+Before taking any actions that update the booking database (booking, modifying flights, editing baggage, changing cabin class, or updating passenger information), you must list the action details and obtain explicit user confirmation (yes) to proceed.
+
+You should not provide any information, knowledge, or procedures not provided by the user or available tools, or give subjective recommendations or comments.
+
+You should only make one tool call at a time, and if you make a tool call, you should not respond to the user simultaneously. If you respond to the user, you should not make a tool call at the same time.
+
+You should deny user requests that are against this policy.
+
+You should transfer the user to a human agent if and only if the request cannot be handled within the scope of your actions. To transfer, first make a tool call to transfer_to_human_agents, and then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
+
+## Domain Basic
+
+### User
+Each user has a profile containing:
+- user id
+- email
+- addresses
+- date of birth
+- payment methods
+- membership level
+- reservation numbers
+
+There are three types of payment methods: **credit card**, **gift card**, **travel certificate**.
+
+There are three membership levels: **regular**, **silver**, **gold**.
+
+### Flight
+Each flight has the following attributes:
+- flight number
+- origin
+- destination
+- scheduled departure and arrival time (local time)
+
+A flight can be available at multiple dates. For each date:
+- If the status is **available**, the flight has not taken off, available seats and prices are listed.
+- If the status is **delayed** or **on time**, the flight has not taken off, cannot be booked.
+- If the status is **flying**, the flight has taken off but not landed, cannot be booked.
+
+There are three cabin classes: **basic economy**, **economy**, **business**. **basic economy** is its own class, completely distinct from **economy**.
+
+Seat availability and prices are listed for each cabin class.
+
+### Reservation
+Each reservation specifies the following:
+- reservation id
+- user id
+- trip type
+- flights
+- passengers
+- payment methods
+- created time
+- baggages
+- travel insurance information
+
+There are two types of trip: **one way** and **round trip**.
+
+## Book flight
+
+The agent must first obtain the user id from the user.
+
+The agent should then ask for the trip type, origin, destination.
+
+Cabin:
+- Cabin class must be the same across all the flights in a reservation.
+
+Passengers:
+- Each reservation can have at most five passengers.
+- The agent needs to collect the first name, last name, and date of birth for each passenger.
+- All passengers must fly the same flights in the same cabin.
+
+Payment:
+- Each reservation can use at most one travel certificate, at most one credit card, and at most three gift cards.
+- The remaining amount of a travel certificate is not refundable.
+- All payment methods must already be in user profile for safety reasons.
+
+Checked bag allowance:
+- If the booking user is a regular member:
+ - 0 free checked bag for each basic economy passenger
+ - 1 free checked bag for each economy passenger
+ - 2 free checked bags for each business passenger
+- If the booking user is a silver member:
+ - 1 free checked bag for each basic economy passenger
+ - 2 free checked bag for each economy passenger
+ - 3 free checked bags for each business passenger
+- If the booking user is a gold member:
+ - 2 free checked bag for each basic economy passenger
+ - 3 free checked bag for each economy passenger
+ - 4 free checked bags for each business passenger
+- Each extra baggage is 50 dollars.
+
+Do not add checked bags that the user does not need.
+
+Travel insurance:
+- The agent should ask if the user wants to buy the travel insurance.
+- The travel insurance is 30 dollars per passenger and enables full refund if the user needs to cancel the flight given health or weather reasons.
+
+## Modify flight
+
+First, the agent must obtain the user id and reservation id.
+- The user must provide their user id.
+- If the user doesn't know their reservation id, the agent should help locate it using available tools.
+
+Change flights:
+- Basic economy flights cannot be modified.
+- Other reservations can be modified without changing the origin, destination, and trip type.
+- Some flight segments can be kept, but their prices will not be updated based on the current price.
+- The API does not check these for the agent, so the agent must make sure the rules apply before calling the API!
+
+Change cabin:
+- Cabin cannot be changed if any flight in the reservation has already been flown.
+- In other cases, all reservations, including basic economy, can change cabin without changing the flights.
+- Cabin class must remain the same across all the flights in the same reservation; changing cabin for just one flight segment is not possible.
+- If the price after cabin change is higher than the original price, the user is required to pay for the difference.
+- If the price after cabin change is lower than the original price, the user is should be refunded the difference.
+
+Change baggage and insurance:
+- The user can add but not remove checked bags.
+- The user cannot add insurance after initial booking.
+
+Change passengers:
+- The user can modify passengers but cannot modify the number of passengers.
+- Even a human agent cannot modify the number of passengers.
+
+Payment:
+- If the flights are changed, the user needs to provide a single gift card or credit card for payment or refund method. The payment method must already be in user profile for safety reasons.
+
+## Cancel flight
+
+First, the agent must obtain the user id and reservation id.
+- The user must provide their user id.
+- If the user doesn't know their reservation id, the agent should help locate it using available tools.
+
+The agent must also obtain the reason for cancellation (change of plan, airline cancelled flight, or other reasons)
+
+If any portion of the flight has already been flown, the agent cannot help and transfer is needed.
+
+Otherwise, flight can be cancelled if any of the following is true:
+- The booking was made within the last 24 hrs
+- The flight is cancelled by airline
+- It is a business flight
+- The user has travel insurance and the reason for cancellation is covered by insurance.
+
+The API does not check that cancellation rules are met, so the agent must make sure the rules apply before calling the API!
+
+Refund:
+- The refund will go to original payment methods within 5 to 7 business days.
+
+## Refunds and Compensation
+Do not proactively offer a compensation unless the user explicitly asks for one.
+
+Do not compensate if the user is regular member and has no travel insurance and flies (basic) economy.
+
+Always confirms the facts before offering compensation.
+
+Only compensate if the user is a silver/gold member or has travel insurance or flies business.
+
+- If the user complains about cancelled flights in a reservation, the agent can offer a certificate as a gesture after confirming the facts, with the amount being $100 times the number of passengers.
+
+- If the user complains about delayed flights in a reservation and wants to change or cancel the reservation, the agent can offer a certificate as a gesture after confirming the facts and changing or cancelling the reservation, with the amount being $50 times the number of passengers.
+
+Do not offer compensation for any other reason than the ones listed above.
+
+
diff --git a/sample_solutions/AgenticCodeExecution/examples/airline/airline_data_model.py b/sample_solutions/AgenticCodeExecution/examples/airline/airline_data_model.py
new file mode 100644
index 00000000..4a265c49
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/airline/airline_data_model.py
@@ -0,0 +1,335 @@
+"""Data models for the airline domain - Standalone version without tau2 dependencies."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+
+from pydantic import BaseModel, Field
+
+FlightType = Literal["round_trip", "one_way"]
+CabinClass = Literal["business", "economy", "basic_economy"]
+Insurance = Literal["yes", "no"]
+
+
+MembershipLevel = Annotated[
+ Literal["gold", "silver", "regular"], Field(description="Membership level")
+]
+
+
+class DB(BaseModel):
+ """Base persistent database model with load/save helpers."""
+
+ model_config = {"extra": "allow"}
+ _db_path: str = ""
+
+ @classmethod
+ def load(cls, path: str | Path):
+ with open(path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+ db = cls.model_validate(data)
+ db._db_path = str(path)
+ return db
+
+ def save(self) -> None:
+ if self._db_path:
+ with open(self._db_path, "w", encoding="utf-8") as f:
+ json.dump(self.model_dump(exclude={"_db_path"}, mode="json"), f, indent=2)
+
+
+class AirportCode(BaseModel):
+ """Airport location metadata represented by IATA code and city."""
+
+ iata: str = Field(description="IATA code")
+ city: str = Field(description="City name")
+
+
+AirportInfo = Annotated[list[AirportCode], Field(description="Airport information")]
+
+
+class Name(BaseModel):
+ """Person full name."""
+
+ first_name: str = Field(description="The person's first name")
+ last_name: str = Field(description="The person's last name")
+
+
+class Address(BaseModel):
+ """Postal address details."""
+
+ address1: str = Field(description="Primary address line")
+ address2: Optional[str] = Field(None, description="Secondary address line (optional)")
+ city: str = Field(description="City name")
+ country: str = Field(description="Country name")
+ state: str = Field(description="State or province name")
+ zip: str = Field(description="Postal code")
+
+
+class Payment(BaseModel):
+ """Payment transaction record for a reservation."""
+
+ payment_id: str = Field(description="Unique identifier for the payment")
+ amount: int = Field(description="Payment amount in dollars")
+
+
+class PaymentMethodBase(BaseModel):
+ """Base payment method identifier shared by all payment sources."""
+
+ source: str = Field(description="Type of payment method")
+ id: str = Field(description="Unique identifier for the payment method")
+
+
+class CreditCard(PaymentMethodBase):
+ """Credit card payment source."""
+
+ source: Literal["credit_card"] = Field(
+ description="Indicates this is a credit card payment method"
+ )
+ brand: str = Field(description="Credit card brand (e.g., visa, mastercard)")
+ last_four: str = Field(description="Last four digits of the credit card")
+
+
+class GiftCard(PaymentMethodBase):
+ """Gift card payment source."""
+
+ source: Literal["gift_card"] = Field(
+ description="Indicates this is a gift card payment method"
+ )
+ amount: float = Field(description="Gift card value amount")
+ id: str = Field(description="Unique identifier for the gift card")
+
+
+class Certificate(PaymentMethodBase):
+ """Travel certificate payment source."""
+
+ source: Literal["certificate"] = Field(
+ description="Indicates this is a certificate payment method"
+ )
+ amount: float = Field(description="Certificate value amount")
+
+
+PaymentMethod = Union[CreditCard, GiftCard, Certificate]
+
+
+class Passenger(BaseModel):
+ """Passenger profile used for bookings and saved travelers."""
+
+ first_name: str = Field(description="Passenger's first name")
+ last_name: str = Field(description="Passenger's last name")
+ dob: str = Field(description="Date of birth in YYYY-MM-DD format")
+
+
+SeatPrices = Annotated[
+ dict[CabinClass, int], Field(description="Prices for different cabin classes")
+]
+AvailableSeats = Annotated[
+ dict[CabinClass, int],
+ Field(description="Available seats for different cabin classes"),
+]
+
+
+class FlightDateStatusAvailable(BaseModel):
+ """Flight instance status when seats are available for booking."""
+
+ status: Literal["available"] = Field(
+ description="Indicates flight is available for booking"
+ )
+ available_seats: AvailableSeats = Field(description="Available seats by class")
+ prices: SeatPrices = Field(description="Current prices by class")
+
+
+class FlightDataStatusOnTime(BaseModel):
+ """Flight status for an on-time active flight day."""
+
+ status: Literal["on time"] = Field(description="Indicates flight is on time")
+ estimated_departure_time_est: str = Field(
+ description="Estimated departure time in EST in the format YYYY-MM-DDTHH:MM:SS, e.g 2024-05-15T06:04:00"
+ )
+ estimated_arrival_time_est: str = Field(
+ description="Estimated arrival time in EST in the format YYYY-MM-DDTHH:MM:SS, e.g 2024-05-15T07:30:00"
+ )
+
+
+class FlightDataStatusFlying(BaseModel):
+ """Flight status for an in-progress flight."""
+
+ status: Literal["flying"] = Field(description="Indicates flight is in flight")
+ actual_departure_time_est: str = Field(
+ description="Actual departure time in EST in the format YYYY-MM-DDTHH:MM:SS, e.g 2024-05-15T06:04:00"
+ )
+ estimated_arrival_time_est: str = Field(
+ description="Estimated arrival time in EST in the format YYYY-MM-DDTHH:MM:SS, e.g 2024-05-15T07:30:00"
+ )
+
+
+class FlightDateStatusLanded(BaseModel):
+ """Flight status after arrival."""
+
+ status: Literal["landed"] = Field(description="Indicates flight has landed")
+ actual_departure_time_est: str = Field(
+ description="Actual departure time in EST in the format YYYY-MM-DDTHH:MM:SS, e.g 2024-05-15T06:04:00"
+ )
+ actual_arrival_time_est: str = Field(
+ description="Actual arrival time in EST in the format YYYY-MM-DDTHH:MM:SS, e.g 2024-05-15T07:30:00"
+ )
+
+
+class FlightDateStatusCancelled(BaseModel):
+ """Flight status for a cancelled flight day."""
+
+ status: Literal["cancelled"] = Field(description="Indicates flight was cancelled")
+
+
+class FlightDateStatusDelayed(BaseModel):
+ """Flight status for a delayed flight day."""
+
+ status: Literal["delayed"] = Field(description="Indicates flight was delayed")
+ estimated_departure_time_est: str = Field(
+ description="Estimated departure time in EST in the format YYYY-MM-DDTHH:MM:SS, e.g 2024-05-15T06:04:00"
+ )
+ estimated_arrival_time_est: str = Field(
+ description="Estimated arrival time in EST in the format YYYY-MM-DDTHH:MM:SS, e.g 2024-05-15T07:30:00"
+ )
+
+
+FlightDateStatus = Union[
+ FlightDateStatusAvailable,
+ FlightDateStatusLanded,
+ FlightDateStatusCancelled,
+ FlightDateStatusDelayed,
+ FlightDataStatusFlying,
+ FlightDataStatusOnTime,
+]
+
+
+class FlightBase(BaseModel):
+ """Shared identifying fields for flights."""
+
+ flight_number: str = Field(description="Unique flight identifier")
+ origin: str = Field(description="IATA code for origin airport")
+ destination: str = Field(description="IATA code for destination airport")
+
+
+class Flight(FlightBase):
+ """Scheduled flight with date-specific operational statuses."""
+
+ scheduled_departure_time_est: str = Field(
+ description="Scheduled departure time in EST in the format HH:MM:SS, e.g 06:00:00"
+ )
+ scheduled_arrival_time_est: str = Field(
+ description="Scheduled arrival time in EST in the format HH:MM:SS, e.g 07:00:00"
+ )
+ dates: Dict[str, FlightDateStatus] = Field(
+ description="Flight status by date (YYYY-MM-DD)"
+ )
+
+
+class DirectFlight(FlightBase):
+ """Search result view of a directly available flight option."""
+
+ status: Literal["available"] = Field(
+ description="Indicates flight is available for booking"
+ )
+ scheduled_departure_time_est: str = Field(
+ description="Scheduled departure time in EST in the format HH:MM:SS, e.g 06:00:00"
+ )
+ scheduled_arrival_time_est: str = Field(
+ description="Scheduled arrival time in EST in the format HH:MM:SS, e.g 07:00:00"
+ )
+ date: Optional[str] = Field(description="Flight date in YYYY-MM-DD format", default=None)
+ available_seats: AvailableSeats = Field(description="Available seats by class")
+ prices: SeatPrices = Field(description="Current prices by class")
+
+
+class ReservationFlight(FlightBase):
+ """Flight leg stored as part of a reservation."""
+
+ date: str = Field(description="Flight date in YYYY-MM-DD format")
+ price: int = Field(description="Flight price in dollars.")
+
+
+class FlightInfo(BaseModel):
+ """Flight lookup input pair for number and date."""
+
+ flight_number: str = Field(description="Flight number, such as 'HAT001'.")
+ date: str = Field(
+ description="The date for the flight in the format 'YYYY-MM-DD', such as '2024-05-01'."
+ )
+
+
+class User(BaseModel):
+ """Airline user profile with payment methods, passengers, and reservations."""
+
+ user_id: str = Field(description="Unique identifier for the user")
+ name: Name = Field(description="User's full name")
+ address: Address = Field(description="User's address information")
+ email: str = Field(description="User's email address")
+ dob: str = Field(
+ description="User's date of birth in the format YYYY-MM-DD, e.g 1990-04-05"
+ )
+ payment_methods: Dict[str, PaymentMethod] = Field(
+ description="User's saved payment methods"
+ )
+ saved_passengers: List[Passenger] = Field(
+ description="User's saved passenger information"
+ )
+ membership: MembershipLevel = Field(description="User's membership level")
+ reservations: List[str] = Field(description="List of user's reservation IDs")
+
+
+class Reservation(BaseModel):
+ """Reservation record including itinerary, passengers, and payments."""
+
+ reservation_id: str = Field(description="Unique identifier for the reservation")
+ user_id: str = Field(description="ID of the user who made the reservation")
+ origin: str = Field(description="IATA code for trip origin")
+ destination: str = Field(description="IATA code for trip destination")
+ flight_type: FlightType = Field(description="Type of trip")
+ cabin: CabinClass = Field(description="Selected cabin class")
+ flights: List[ReservationFlight] = Field(
+ description="List of flights in the reservation"
+ )
+ passengers: List[Passenger] = Field(
+ description="List of passengers on the reservation"
+ )
+ payment_history: List[Payment] = Field(
+ description="History of payments for this reservation"
+ )
+ created_at: str = Field(
+ description="Timestamp when reservation was created in the format YYYY-MM-DDTHH:MM:SS"
+ )
+ total_baggages: int = Field(description="Total number of bags in reservation")
+ nonfree_baggages: int = Field(description="Number of paid bags in reservation")
+ insurance: Insurance = Field(description="Whether travel insurance was purchased")
+ status: Optional[Literal["cancelled"]] = Field(
+ description="Status of the reservation", default=None
+ )
+
+
+class FlightDB(DB):
+ """Database containing all airline flights, users, and reservations."""
+
+ flights: Dict[str, Flight] = Field(
+ description="Dictionary of all flights indexed by flight number"
+ )
+ users: Dict[str, User] = Field(
+ description="Dictionary of all users indexed by user ID"
+ )
+ reservations: Dict[str, Reservation] = Field(
+ description="Dictionary of all reservations indexed by reservation ID"
+ )
+
+ def get_statistics(self) -> dict[str, Any]:
+ num_flights = len(self.flights)
+ num_flights_instances = sum(
+ len(flight.dates) for flight in self.flights.values()
+ )
+ num_users = len(self.users)
+ num_reservations = len(self.reservations)
+ return {
+ "num_flights": num_flights,
+ "num_flights_instances": num_flights_instances,
+ "num_users": num_users,
+ "num_reservations": num_reservations,
+ }
diff --git a/sample_solutions/AgenticCodeExecution/examples/airline/data/.gitkeep b/sample_solutions/AgenticCodeExecution/examples/airline/data/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/sample_solutions/AgenticCodeExecution/examples/airline/mcp_airline_server.py b/sample_solutions/AgenticCodeExecution/examples/airline/mcp_airline_server.py
new file mode 100644
index 00000000..c1721296
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/airline/mcp_airline_server.py
@@ -0,0 +1,944 @@
+#!/usr/bin/env python3
+"""
+MCP Server for Airline Tools - Fully Standalone
+
+All business logic is directly in the MCP tools - no intermediate wrapper classes.
+"""
+
+import argparse
+import json
+import os
+import sys
+import urllib.request
+from copy import deepcopy
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from fastmcp import FastMCP
+
+# Add parent directory to sys.path for shared modules (error_hints)
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from airline_data_model import (
+ AirportCode,
+ CabinClass,
+ Certificate,
+ DirectFlight,
+ FlightDB,
+ FlightDateStatusAvailable,
+ FlightInfo,
+ Insurance,
+ Passenger,
+ Payment,
+ Reservation,
+ ReservationFlight,
+ User,
+)
+from error_hints import analyze_execution_error
+
+
+DEFAULT_DB_PATH = str(Path(__file__).resolve().parent / "data" / "db.json")
+
+TAU2_BENCH_URL = (
+ "https://raw.githubusercontent.com/sierra-research/tau2-bench/"
+ "main/data/tau2/domains/airline/db.json"
+)
+
+
+def ensure_db(db_path: str) -> None:
+ """Check that the airline database exists; auto-download from tau2-bench if missing."""
+ p = Path(db_path)
+ if p.exists():
+ return
+ print(f"⚠️ Database not found: {db_path}")
+ print(f" Downloading from tau2-bench …")
+ p.parent.mkdir(parents=True, exist_ok=True)
+ try:
+ urllib.request.urlretrieve(TAU2_BENCH_URL, str(p))
+ print(f" ✅ Downloaded ({p.stat().st_size / 1_048_576:.1f} MB)")
+ except Exception as exc:
+ print(f" ❌ Download failed: {exc}")
+ print(f" Please download manually:")
+ print(f" curl -L -o {db_path} {TAU2_BENCH_URL}")
+ sys.exit(1)
+
+
+mcp = FastMCP(
+ "Airline Tools Server",
+ instructions="""You are an airline customer service agent. Use these tools to help customers with:
+- Looking up users and reservation details
+- Searching direct and one-stop flight options
+- Booking, updating, and cancelling reservations
+- Managing baggage and passenger details
+- Checking flight status
+
+Always verify identity and reservation details before making changes. Ask for confirmation before mutations.""",
+)
+
+
+# Global database state
+_db: Optional[FlightDB] = None # Read-only template DB
+_original_db_path: str = "" # Path to original pristine DB file
+_session_dbs: Dict[str, FlightDB] = {} # Per-session DB copies
+SESSION_DB_DIR = Path(__file__).resolve().parent.parent / "session_dbs"
+SESSION_DB_DIR.mkdir(exist_ok=True)
+
+
+def get_db(session_id: str = "") -> FlightDB:
+ """Get the database for a given session.
+
+ If session_id is empty, returns the read-only template DB.
+ If session_id is provided, returns a per-session pristine copy
+ (created on first access from the original file).
+ """
+ global _db, _original_db_path
+
+ if _db is None:
+ db_path = os.environ.get("AIRLINE_DB_PATH", DEFAULT_DB_PATH)
+ _original_db_path = db_path
+ _db = FlightDB.load(db_path)
+ _db._db_path = "" # Prevent accidental writes to original file
+ print(f"Loaded template airline database from {db_path}")
+ print(f" - {len(_db.flights)} flights")
+ print(f" - {len(_db.users)} users")
+ print(f" - {len(_db.reservations)} reservations")
+
+ if not session_id:
+ return _db
+
+ if session_id not in _session_dbs:
+ db = FlightDB.load(_original_db_path)
+ session_db_file = SESSION_DB_DIR / f"{session_id[:32]}.json"
+ db._db_path = str(session_db_file)
+ _session_dbs[session_id] = db
+ print(
+ f"🆕 Created pristine airline DB for session {session_id[:8]}... "
+ f"({len(_session_dbs)} active sessions)"
+ )
+
+ return _session_dbs[session_id]
+
+
+def _serialize(value):
+ if hasattr(value, "model_dump_json"):
+ return value.model_dump_json(indent=2)
+ if isinstance(value, list):
+ normalized = []
+ for item in value:
+ if hasattr(item, "model_dump"):
+ normalized.append(item.model_dump(mode="json"))
+ elif isinstance(item, list):
+ normalized.append(
+ [
+ sub.model_dump(mode="json") if hasattr(sub, "model_dump") else sub
+ for sub in item
+ ]
+ )
+ else:
+ normalized.append(item)
+ return json.dumps(normalized, indent=2)
+ return value
+
+
+def _get_user(db: FlightDB, user_id: str):
+ if user_id not in db.users:
+ raise ValueError(f"User {user_id} not found")
+ return db.users[user_id]
+
+
+def _get_reservation(db: FlightDB, reservation_id: str):
+ if reservation_id not in db.reservations:
+ raise ValueError(f"Reservation {reservation_id} not found")
+ return db.reservations[reservation_id]
+
+
+def _get_flight(db: FlightDB, flight_number: str):
+ if flight_number not in db.flights:
+ raise ValueError(f"Flight {flight_number} not found")
+ return db.flights[flight_number]
+
+
+def _get_flight_instance(db: FlightDB, flight_number: str, date: str):
+ flight = _get_flight(db, flight_number)
+ if date not in flight.dates:
+ raise ValueError(f"Flight {flight_number} not found on date {date}")
+ return flight.dates[date]
+
+
+def _get_flights_from_flight_infos(db: FlightDB, flight_infos: List[FlightInfo]) -> list:
+ flights = []
+ for flight_info in flight_infos:
+ flights.append(_get_flight_instance(db, flight_info.flight_number, flight_info.date))
+ return flights
+
+
+def _get_new_reservation_id(db: FlightDB) -> str:
+ for reservation_id in ["HATHAT", "HATHAU", "HATHAV"]:
+ if reservation_id not in db.reservations:
+ return reservation_id
+ raise ValueError("Too many reservations")
+
+
+def _get_new_payment_id() -> List[int]:
+ return [3221322, 3221323, 3221324]
+
+
+def _get_datetime() -> str:
+ return "2024-05-15T15:00:00"
+
+
+def _search_direct_flight(
+ db: FlightDB,
+ date: str,
+ origin: Optional[str] = None,
+ destination: Optional[str] = None,
+ leave_after: Optional[str] = None,
+) -> list[DirectFlight]:
+ results = []
+ for flight in db.flights.values():
+ check = (
+ (origin is None or flight.origin == origin)
+ and (destination is None or flight.destination == destination)
+ and (date in flight.dates)
+ and (flight.dates[date].status == "available")
+ and (
+ leave_after is None
+ or flight.scheduled_departure_time_est >= leave_after
+ )
+ )
+ if check:
+ direct_flight = DirectFlight(
+ flight_number=flight.flight_number,
+ origin=flight.origin,
+ destination=flight.destination,
+ status="available",
+ scheduled_departure_time_est=flight.scheduled_departure_time_est,
+ scheduled_arrival_time_est=flight.scheduled_arrival_time_est,
+ available_seats=flight.dates[date].available_seats,
+ prices=flight.dates[date].prices,
+ )
+ results.append(direct_flight)
+ return results
+
+
+def _payment_for_update(db: FlightDB, user_id: str, payment_id: str, total_price: int) -> Optional[Payment]:
+ user = _get_user(db, user_id)
+
+ if payment_id not in user.payment_methods:
+ raise ValueError("Payment method not found")
+ payment_method = user.payment_methods[payment_id]
+
+ if payment_method.source == "certificate":
+ raise ValueError("Certificate cannot be used to update reservation")
+ elif payment_method.source == "gift_card" and payment_method.amount < total_price:
+ raise ValueError("Gift card balance is not enough")
+
+ if payment_method.source == "gift_card":
+ payment_method.amount -= total_price
+
+ if total_price == 0:
+ return None
+ return Payment(payment_id=payment_id, amount=total_price)
+
+
+_ALLOWED_CABINS = {"business", "economy", "basic_economy"}
+
+
+def _normalize_cabin(cabin: str) -> str:
+ if not isinstance(cabin, str) or not cabin.strip():
+ raise ValueError("Cabin must be a non-empty string")
+ normalized = cabin.strip().lower().replace("-", "_").replace(" ", "_")
+ if normalized == "basiceconomy":
+ normalized = "basic_economy"
+ if normalized not in _ALLOWED_CABINS:
+ raise ValueError(
+ f"Invalid cabin '{cabin}'. Expected one of: business, economy, basic_economy"
+ )
+ return normalized
+
+
+def _resolve_payment_id(payment_id: str = "", payment_method_id: str = "") -> str:
+ resolved = payment_id or payment_method_id
+ if not resolved:
+ raise ValueError("payment_id is required")
+ return resolved
+
+
+def _serialize_reservation_with_aliases(reservation: Reservation) -> str:
+ data = reservation.model_dump(mode="json")
+ payment_history = data.get("payment_history", [])
+ if isinstance(payment_history, list):
+ for entry in payment_history:
+ if (
+ isinstance(entry, dict)
+ and "payment_id" in entry
+ and "payment_method_id" not in entry
+ ):
+ entry["payment_method_id"] = entry["payment_id"]
+ return json.dumps(data, indent=2)
+
+
+def _serialize_user_with_aliases(user) -> str:
+ data = user.model_dump(mode="json")
+ payment_methods = data.get("payment_methods", {})
+ if isinstance(payment_methods, dict):
+ for _, method in payment_methods.items():
+ if isinstance(method, dict) and "source" in method and "type" not in method:
+ method["type"] = method["source"]
+ return json.dumps(data, indent=2)
+
+
+def _get_data_model_defs() -> Dict[str, dict]:
+ model_classes = [
+ AirportCode,
+ Certificate,
+ DirectFlight,
+ FlightDateStatusAvailable,
+ FlightInfo,
+ Passenger,
+ Payment,
+ ReservationFlight,
+ Reservation,
+ User,
+ ]
+ defs: Dict[str, dict] = {}
+ for model_cls in model_classes:
+ model_name = getattr(model_cls, "__name__", str(model_cls))
+ model_json_schema = getattr(model_cls, "model_json_schema", None)
+ if not callable(model_json_schema):
+ continue
+ schema = model_json_schema(ref_template="#/$defs/{model}")
+ defs[model_name] = {
+ "description": schema.get("description", ""),
+ "properties": schema.get("properties", {}),
+ }
+ return defs
+
+
+def _get_tool_metadata_payload() -> Dict[str, Any]:
+ ordered_actions = [
+ "book_reservation",
+ "calculate",
+ "cancel_reservation",
+ "get_flight_status",
+ "get_reservation_details",
+ "get_user_details",
+ "list_all_airports",
+ "search_direct_flight",
+ "search_onestop_flight",
+ "send_certificate",
+ "transfer_to_human_agents",
+ "update_reservation_baggages",
+ "update_reservation_flights",
+ "update_reservation_passengers",
+ ]
+
+ return {
+ "ordered_actions": ordered_actions,
+ "return_types": {
+ "book_reservation": "str (JSON)",
+ "calculate": "str",
+ "cancel_reservation": "str (JSON)",
+ "get_flight_status": "str",
+ "get_reservation_details": "str (JSON)",
+ "get_user_details": "str (JSON)",
+ "list_all_airports": "str (JSON)",
+ "search_direct_flight": "str (JSON)",
+ "search_onestop_flight": "str (JSON)",
+ "send_certificate": "str",
+ "transfer_to_human_agents": "str",
+ "update_reservation_baggages": "str (JSON)",
+ "update_reservation_flights": "str (JSON)",
+ "update_reservation_passengers": "str (JSON)",
+ },
+ "semantic_types": {
+ "book_reservation": "Reservation",
+ "cancel_reservation": "Reservation",
+ "get_reservation_details": "Reservation",
+ "get_user_details": "User",
+ "list_all_airports": "list[AirportCode]",
+ "search_direct_flight": "list[DirectFlight]",
+ "search_onestop_flight": "list[list[DirectFlight]]",
+ "update_reservation_baggages": "Reservation",
+ "update_reservation_flights": "Reservation",
+ "update_reservation_passengers": "Reservation",
+ },
+ "data_model_defs": _get_data_model_defs(),
+ }
+
+
+# ==================== READ / GENERIC TOOLS ====================
+
+@mcp.tool()
+def calculate(expression: str, session_id: str = "") -> str:
+ """
+ Calculate the result of a mathematical expression.
+
+ Args:
+ expression: The mathematical expression to calculate, such as '2 + 2'. The expression can contain numbers, operators (+, -, *, /), parentheses, and spaces.
+
+ Returns:
+ str: The result of the mathematical expression as a string.
+
+ Raises:
+ ValueError: If the expression is invalid.
+ """
+ if not all(char in "0123456789+-*/(). " for char in expression):
+ raise ValueError("Invalid characters in expression")
+ return str(round(float(eval(expression, {"__builtins__": None}, {})), 2))
+
+
+@mcp.tool()
+def get_reservation_details(reservation_id: str, session_id: str = "") -> str:
+ """
+ Get the details of a reservation.
+
+ This is the primary lookup method before any reservation write action.
+ Use this to verify current status, cabin, flights, passengers, and payment history.
+
+ Args:
+ reservation_id: The reservation ID, such as '8JX2WO'.
+
+ Returns:
+ str: The reservation details serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ return _serialize_reservation_with_aliases(_get_reservation(db, reservation_id))
+
+
+@mcp.tool()
+def get_user_details(user_id: str, session_id: str = "") -> str:
+ """
+ Get the details of a user, including their reservations and payment methods.
+
+ Args:
+ user_id: The user ID, such as 'sara_doe_496'.
+
+ Returns:
+ str: The user details serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ return _serialize_user_with_aliases(_get_user(db, user_id))
+
+
+@mcp.tool()
+def list_all_airports(session_id: str = "") -> str:
+ """
+ Returns a list of all available airports.
+
+ Returns:
+ str: The airport list serialized as a JSON string.
+ """
+ airports = [
+ AirportCode(iata="SFO", city="San Francisco"),
+ AirportCode(iata="JFK", city="New York"),
+ AirportCode(iata="LAX", city="Los Angeles"),
+ AirportCode(iata="ORD", city="Chicago"),
+ AirportCode(iata="DFW", city="Dallas"),
+ AirportCode(iata="DEN", city="Denver"),
+ AirportCode(iata="SEA", city="Seattle"),
+ AirportCode(iata="ATL", city="Atlanta"),
+ AirportCode(iata="MIA", city="Miami"),
+ AirportCode(iata="BOS", city="Boston"),
+ AirportCode(iata="PHX", city="Phoenix"),
+ AirportCode(iata="IAH", city="Houston"),
+ AirportCode(iata="LAS", city="Las Vegas"),
+ AirportCode(iata="MCO", city="Orlando"),
+ AirportCode(iata="EWR", city="Newark"),
+ AirportCode(iata="CLT", city="Charlotte"),
+ AirportCode(iata="MSP", city="Minneapolis"),
+ AirportCode(iata="DTW", city="Detroit"),
+ AirportCode(iata="PHL", city="Philadelphia"),
+ AirportCode(iata="LGA", city="LaGuardia"),
+ ]
+ return _serialize(airports)
+
+
+@mcp.tool()
+def search_direct_flight(origin: str, destination: str, date: str, session_id: str = "") -> str:
+ """
+ Search for direct flights between two cities on a specific date.
+
+ Args:
+ origin: IATA code for origin airport, such as 'JFK'.
+ destination: IATA code for destination airport, such as 'LAX'.
+ date: Date in YYYY-MM-DD format.
+
+ Returns:
+ str: Matching direct flights serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ return _serialize(_search_direct_flight(db, date=date, origin=origin, destination=destination))
+
+
+@mcp.tool()
+def search_onestop_flight(origin: str, destination: str, date: str, session_id: str = "") -> str:
+ """
+ Search for one-stop flights between two cities on a specific date.
+
+ Args:
+ origin: IATA code for origin airport, such as 'JFK'.
+ destination: IATA code for destination airport, such as 'LAX'.
+ date: Date in YYYY-MM-DD format.
+
+ Returns:
+ str: Candidate one-stop itineraries serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ results = []
+ for result1 in _search_direct_flight(db, date=date, origin=origin, destination=None):
+ result1.date = date
+ date2 = (
+ f"2024-05-{int(date[-2:]) + 1}"
+ if "+1" in result1.scheduled_arrival_time_est
+ else date
+ )
+ for result2 in _search_direct_flight(
+ db,
+ date=date2,
+ origin=result1.destination,
+ destination=destination,
+ leave_after=result1.scheduled_arrival_time_est,
+ ):
+ result2.date = date2
+ results.append([result1, result2])
+ return _serialize(results)
+
+
+@mcp.tool()
+def get_flight_status(flight_number: str, date: str, session_id: str = "") -> str:
+ """
+ Get the status of a flight.
+
+ Args:
+ flight_number: The flight number.
+ date: The date of the flight.
+
+ Returns:
+ str: The status of the flight.
+ """
+ db = get_db(session_id)
+ return _get_flight_instance(db, flight_number, date).status
+
+
+@mcp.tool()
+def transfer_to_human_agents(summary: str, session_id: str = "") -> str:
+ """
+ Transfer the user to a human agent with a summary.
+
+ Args:
+ summary: Concise factual summary of issue and attempts.
+
+ Returns:
+ str: Confirmation string.
+ """
+ return "Transfer successful"
+
+
+@mcp.tool()
+def get_execution_error_hint(error_msg: str, code: str = "", session_id: str = "") -> str:
+ """
+ Return a recovery hint for sandbox execution/tool errors.
+
+ Args:
+ error_msg: The root error message produced by sandbox/tool execution.
+ code: The executed python code snippet (optional, used for pattern detection).
+
+ Returns:
+ str: A concise hint string. Empty string if no specific hint applies.
+ """
+ return analyze_execution_error(error_msg=error_msg, code=code, domain="airline")
+
+
+@mcp.tool()
+def get_tool_metadata(session_id: str = "") -> str:
+ """Return metadata used to build execute_python action/data-model description.
+
+ Returns:
+ JSON string with keys like return_types and data_model_defs.
+ """
+ return json.dumps(_get_tool_metadata_payload())
+
+
+# ==================== WRITE TOOLS ====================
+
+@mcp.tool()
+def send_certificate(user_id: str, amount: int, session_id: str = "") -> str:
+ """
+ Send a certificate to a user.
+
+ Args:
+ user_id: User ID such as 'sara_doe_496'.
+ amount: Certificate amount.
+
+ Returns:
+ str: Confirmation message.
+ """
+ db = get_db(session_id)
+ user = _get_user(db, user_id)
+ for payment_id in [f"certificate_{id}" for id in _get_new_payment_id()]:
+ if payment_id not in user.payment_methods:
+ user.payment_methods[payment_id] = Certificate(
+ id=payment_id,
+ amount=amount,
+ source="certificate",
+ )
+ db.save()
+ return f"Certificate {payment_id} added to user {user_id} with amount {amount}."
+ raise ValueError("Too many certificates")
+
+
+@mcp.tool()
+def book_reservation(
+ user_id: str,
+ origin: str,
+ destination: str,
+ flight_type: str,
+ cabin: str,
+ flights: List[dict],
+ passengers: List[dict],
+ payment_methods: List[dict],
+ total_baggages: int,
+ nonfree_baggages: int,
+ insurance: str,
+ session_id: str = "",
+) -> str:
+ """
+ Book a reservation.
+
+ Args:
+ user_id: User ID such as 'sara_doe_496'.
+ origin: Origin IATA code.
+ destination: Destination IATA code.
+ flight_type: 'one_way' or 'round_trip'.
+ cabin: Cabin class.
+ flights: List of {flight_number, date} objects.
+ passengers: List of passenger objects.
+ payment_methods: List of payment objects.
+ total_baggages: Total baggage count.
+ nonfree_baggages: Non-free baggage count.
+ insurance: 'yes' or 'no'.
+
+ Returns:
+ str: Reservation details serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ user = _get_user(db, user_id)
+ reservation_id = _get_new_reservation_id(db)
+ cabin = _normalize_cabin(cabin)
+
+ if all(isinstance(flight, dict) for flight in flights):
+ flights = [FlightInfo(**flight) for flight in flights]
+ if all(isinstance(passenger, dict) for passenger in passengers):
+ passengers = [Passenger(**passenger) for passenger in passengers]
+ if all(isinstance(payment_method, dict) for payment_method in payment_methods):
+ normalized_payment_methods = []
+ for payment_method in payment_methods:
+ method = dict(payment_method)
+ if "payment_id" not in method and "payment_method_id" in method:
+ method["payment_id"] = method["payment_method_id"]
+ normalized_payment_methods.append(method)
+ payment_methods = normalized_payment_methods
+ payment_methods = [Payment(**payment_method) for payment_method in payment_methods]
+
+ reservation = Reservation(
+ reservation_id=reservation_id,
+ user_id=user_id,
+ origin=origin,
+ destination=destination,
+ flight_type=flight_type,
+ cabin=cabin,
+ flights=[],
+ passengers=deepcopy(passengers),
+ payment_history=deepcopy(payment_methods),
+ created_at=_get_datetime(),
+ total_baggages=total_baggages,
+ nonfree_baggages=nonfree_baggages,
+ insurance=insurance,
+ )
+
+ total_price = 0
+ all_flights_date_data: list[FlightDateStatusAvailable] = []
+
+ for flight_info in flights:
+ flight_number = flight_info.flight_number
+ flight = _get_flight(db, flight_number)
+ flight_date_data = _get_flight_instance(db, flight_number, flight_info.date)
+
+ if not isinstance(flight_date_data, FlightDateStatusAvailable):
+ raise ValueError(
+ f"Flight {flight_number} not available on date {flight_info.date}"
+ )
+ if flight_date_data.available_seats[cabin] < len(passengers):
+ raise ValueError(f"Not enough seats on flight {flight_number}")
+
+ price = flight_date_data.prices[cabin]
+ reservation.flights.append(
+ ReservationFlight(
+ origin=flight.origin,
+ destination=flight.destination,
+ flight_number=flight_number,
+ date=flight_info.date,
+ price=price,
+ )
+ )
+ all_flights_date_data.append(flight_date_data)
+ total_price += price * len(passengers)
+
+ if insurance == "yes":
+ total_price += 30 * len(passengers)
+ total_price += 50 * nonfree_baggages
+
+ for payment_method in payment_methods:
+ payment_id = payment_method.payment_id
+ amount = payment_method.amount
+ if payment_id not in user.payment_methods:
+ raise ValueError(f"Payment method {payment_id} not found")
+ user_payment_method = user.payment_methods[payment_id]
+ if user_payment_method.source in {"gift_card", "certificate"}:
+ if user_payment_method.amount < amount:
+ raise ValueError(f"Not enough balance in payment method {payment_id}")
+
+ total_payment = sum(payment.amount for payment in payment_methods)
+ if total_payment != total_price:
+ raise ValueError(
+ f"Payment amount does not add up, total price is {total_price}, but paid {total_payment}"
+ )
+
+ for payment_method in payment_methods:
+ payment_id = payment_method.payment_id
+ amount = payment_method.amount
+ user_payment_method = user.payment_methods[payment_id]
+ if user_payment_method.source == "gift_card":
+ user_payment_method.amount -= amount
+ elif user_payment_method.source == "certificate":
+ user.payment_methods.pop(payment_id)
+
+ for flight_date_data in all_flights_date_data:
+ flight_date_data.available_seats[cabin] -= len(passengers)
+
+ db.reservations[reservation_id] = reservation
+ db.users[user_id].reservations.append(reservation_id)
+ db.save()
+ return _serialize_reservation_with_aliases(reservation)
+
+
+@mcp.tool()
+def cancel_reservation(reservation_id: str, session_id: str = "") -> str:
+ """
+ Cancel the whole reservation.
+
+ Args:
+ reservation_id: Reservation ID such as 'ZFA04Y'.
+
+ Returns:
+ str: Updated reservation serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ reservation = _get_reservation(db, reservation_id)
+
+ refunds = []
+ for payment in reservation.payment_history:
+ refunds.append(Payment(payment_id=payment.payment_id, amount=-payment.amount))
+ reservation.payment_history.extend(refunds)
+ reservation.status = "cancelled"
+
+ db.save()
+ return _serialize_reservation_with_aliases(reservation)
+
+
+@mcp.tool()
+def update_reservation_baggages(
+ reservation_id: str,
+ total_baggages: int,
+ nonfree_baggages: int,
+ payment_id: str = "",
+ payment_method_id: str = "",
+ session_id: str = "",
+) -> str:
+ """
+ Update the baggage information of a reservation.
+
+ Args:
+ reservation_id: Reservation ID such as 'ZFA04Y'.
+ total_baggages: Final total baggage count.
+ nonfree_baggages: Final non-free baggage count.
+ payment_id: Payment method ID from the booking user.
+ payment_method_id: Alias for payment_id (accepted for compatibility).
+
+ Returns:
+ str: Updated reservation serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ reservation = _get_reservation(db, reservation_id)
+ resolved_payment_id = _resolve_payment_id(payment_id, payment_method_id)
+
+ total_price = 50 * max(0, nonfree_baggages - reservation.nonfree_baggages)
+ payment = _payment_for_update(db, reservation.user_id, resolved_payment_id, total_price)
+ if payment is not None:
+ reservation.payment_history.append(payment)
+
+ reservation.total_baggages = total_baggages
+ reservation.nonfree_baggages = nonfree_baggages
+ db.save()
+ return _serialize_reservation_with_aliases(reservation)
+
+
+@mcp.tool()
+def update_reservation_flights(
+ reservation_id: str,
+ cabin: str,
+ flights: List[dict],
+ payment_id: str = "",
+ payment_method_id: str = "",
+ session_id: str = "",
+) -> str:
+ """
+ Update the flight information of a reservation.
+
+ IMPORTANT:
+ - Provide COMPLETE updated itinerary, not just changed legs.
+ - Use exact flight numbers/dates from search results.
+
+ Args:
+ reservation_id: Reservation ID such as 'ZFA04Y'.
+ cabin: Updated cabin class. Accepts business/economy/basic_economy (also tolerant to spaces/hyphens).
+ flights: Complete itinerary as list of {flight_number, date}.
+ payment_id: Payment method ID from booking user.
+ payment_method_id: Alias for payment_id (accepted for compatibility).
+
+ Returns:
+ str: Updated reservation serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ reservation = _get_reservation(db, reservation_id)
+ user = _get_user(db, reservation.user_id)
+ cabin = _normalize_cabin(cabin)
+ resolved_payment_id = _resolve_payment_id(payment_id, payment_method_id)
+
+ if all(isinstance(flight, dict) for flight in flights):
+ flights = [FlightInfo(**flight) for flight in flights]
+
+ total_price = 0
+ reservation_flights = []
+ for flight_info in flights:
+ matching_reservation_flight = next(
+ (
+ reservation_flight
+ for reservation_flight in reservation.flights
+ if reservation_flight.flight_number == flight_info.flight_number
+ and reservation_flight.date == flight_info.date
+ and cabin == reservation.cabin
+ ),
+ None,
+ )
+ if matching_reservation_flight:
+ total_price += matching_reservation_flight.price * len(reservation.passengers)
+ reservation_flights.append(matching_reservation_flight)
+ continue
+
+ flight = _get_flight(db, flight_info.flight_number)
+ flight_date_data = _get_flight_instance(db, flight_info.flight_number, flight_info.date)
+ if not isinstance(flight_date_data, FlightDateStatusAvailable):
+ raise ValueError(
+ f"Flight {flight_info.flight_number} not available on date {flight_info.date}"
+ )
+ if flight_date_data.available_seats[cabin] < len(reservation.passengers):
+ raise ValueError(f"Not enough seats on flight {flight_info.flight_number}")
+
+ reservation_flight = ReservationFlight(
+ flight_number=flight_info.flight_number,
+ date=flight_info.date,
+ price=flight_date_data.prices[cabin],
+ origin=flight.origin,
+ destination=flight.destination,
+ )
+ total_price += reservation_flight.price * len(reservation.passengers)
+ reservation_flights.append(reservation_flight)
+
+ total_price -= sum(flight.price for flight in reservation.flights) * len(reservation.passengers)
+
+ payment = _payment_for_update(db, user.user_id, resolved_payment_id, total_price)
+ if payment is not None:
+ reservation.payment_history.append(payment)
+
+ reservation.flights = reservation_flights
+ reservation.cabin = cabin
+
+ db.save()
+ return _serialize_reservation_with_aliases(reservation)
+
+
+@mcp.tool()
+def update_reservation_passengers(
+ reservation_id: str,
+ passengers: List[dict],
+ session_id: str = "",
+) -> str:
+ """
+ Update the passenger information of a reservation.
+
+ Passenger count must exactly match existing reservation passenger count.
+
+ Args:
+ reservation_id: Reservation ID such as 'ZFA04Y'.
+ passengers: Full list of passenger objects.
+
+ Returns:
+ str: Updated reservation serialized as a JSON string.
+ """
+ db = get_db(session_id)
+ reservation = _get_reservation(db, reservation_id)
+
+ if all(isinstance(passenger, dict) for passenger in passengers):
+ passengers = [Passenger(**passenger) for passenger in passengers]
+ if len(passengers) != len(reservation.passengers):
+ raise ValueError("Number of passengers does not match")
+
+ reservation.passengers = deepcopy(passengers)
+ db.save()
+ return _serialize_reservation_with_aliases(reservation)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Airline MCP Server")
+ parser.add_argument(
+ "--db-path",
+ default=DEFAULT_DB_PATH,
+ help="Path to the airline database JSON file",
+ )
+ parser.add_argument(
+ "--port",
+ type=int,
+ default=5052,
+ help="Port to run the SSE server on",
+ )
+ parser.add_argument(
+ "--host",
+ default="0.0.0.0",
+ help="Host to bind to",
+ )
+ parser.add_argument(
+ "--transport",
+ choices=["sse", "stdio"],
+ default="sse",
+ help="Transport protocol to use",
+ )
+
+ args = parser.parse_args()
+
+ os.environ["AIRLINE_DB_PATH"] = args.db_path
+
+ ensure_db(args.db_path)
+ get_db()
+ print(" Original DB file is READ-ONLY (per-session copies used for mutations)")
+ print(f" Session DB dir: {SESSION_DB_DIR}")
+
+ print("\n🚀 Starting Airline MCP Server...")
+ print(f" Transport: {args.transport}")
+ if args.transport == "sse":
+ print(f" Host: {args.host}")
+ print(f" Port: {args.port}")
+ print(f" SSE endpoint: http://{args.host}:{args.port}/sse")
+
+ mcp.run(transport=args.transport, host=args.host, port=args.port)
diff --git a/sample_solutions/AgenticCodeExecution/examples/banking/banking-system-prompt.txt b/sample_solutions/AgenticCodeExecution/examples/banking/banking-system-prompt.txt
new file mode 100644
index 00000000..e2fdac9e
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/banking/banking-system-prompt.txt
@@ -0,0 +1,399 @@
+
+You are a customer service agent that helps the user according to the provided below.
+
+============================================================
+SECTION 1 — GENERIC EXECUTE_PYTHON INSTRUCTIONS
+============================================================
+
+HOW YOU WORK:
+You have ONE tool: execute_python. It runs Python code in a sandbox.
+Inside that sandbox, an `actions` object is pre-loaded with methods you call to perform operations (look up records, search options, modify state, etc.).
+The full list of available actions and their parameters is documented in the execute_python tool description — READ IT before writing code.
+
+TURN STRUCTURE (STRICT):
+In each turn you must do EXACTLY ONE of the following — never both:
+ A) Send a text message to the user, OR
+ B) Make an execute_python call.
+You MUST NOT combine a message and a tool call in the same turn.
+In particular, if execute_python returned an error, your next turn must be ONLY an execute_python call with fixed code — no text, no apology, no explanation.
+
+CRITICAL — WRITE COMPLETE SCRIPTS:
+A single execute_python call can contain MANY actions.* calls chained together in one script.
+You MUST combine all related steps into ONE execute_python call.
+Do NOT make separate execute_python calls for each individual action.
+
+Think about what information you need, then write ONE script that gathers and processes ALL of it.
+Only make a second execute_python call if the first one fails or if you need user input before continuing.
+
+SANDBOX ENVIRONMENT:
+Your code runs in a restricted Python sandbox. These constraints apply:
+- ALLOWED IMPORTS: json, math, datetime, time, re, collections, itertools — nothing else
+- NO file I/O: open(), os, sys, subprocess, pathlib are all blocked
+- NO exec(), eval(), compile()
+- NO dunder attributes: __name__, __class__, __dict__ etc. are blocked
+- NO input(): extract information from the conversation instead
+- The `actions` object is ALREADY AVAILABLE — do not import it
+
+CRITICAL INPUT BAN (HIGHEST PRIORITY):
+- NEVER use input() in execute_python code.
+- NEVER use placeholder variables such as `input`, `Input`, or `user_input`.
+- NEVER write code like `email = input(...)` or `value = Input`.
+- If any value is missing, ask the user in a normal assistant message (not in Python code).
+- In execute_python code, only use values already present in conversation/tool outputs.
+- Any script using input() is invalid and must be rewritten before execution.
+
+CODE GENERATION RULES:
+
+1. Treat actions.* outputs by TYPE:
+ - If output is structured data (object/list), parse with json.loads() before field access.
+ - If output is a scalar (e.g., identifier/status string), use it directly.
+
+ Safe pattern:
+ import json
+ raw = actions.ANY_METHOD(args)
+ # parse when raw looks like structured JSON; otherwise use raw directly
+ data = json.loads(raw) if isinstance(raw, str) and raw.lstrip().startswith('[') else raw
+ # for JSON objects, use the same idea: parse when output is structured JSON text
+
+ ❌ WRONG — accessing fields on the raw JSON string:
+ result = actions.some_write_method(...)
+ print(result['status']) # CRASH: result is a STRING, not a dict
+
+ ✅ CORRECT — parse first, then access:
+ result = json.loads(actions.some_write_method(...))
+ print(result['status'])
+
+2. ALWAYS print results — print() is the ONLY way to see output:
+ print(data)
+
+3. DICT vs LIST — read the signature line for each action:
+ Many actions return DICTS (not lists). Iterating a dict gives you KEYS (strings), not objects.
+
+ ❌ WRONG — treats dict as list of objects:
+ for item in data:
+ print(item['name']) # CRASH: item is a string key, not a dict
+
+ ✅ CORRECT — use .items() for dicts:
+ for key, value in data.items():
+ print(key, value)
+
+ When unsure, print the data first: print(type(data), data)
+
+4. STATELESS: Variables do NOT persist between execute_python calls.
+ Put ALL steps in ONE script.
+
+5. NEVER fabricate identifiers or option values.
+ Extract concrete values from tool outputs and reuse them exactly.
+ Never pass placeholder tokens like "user_id", "order_id", "item_id", "payment_method_id"
+ as actual values. Those are parameter NAMES, not real values.
+
+6. PRE-FLIGHT CHECKLIST before any state-changing action:
+ Before calling any write action, verify all required arguments come from current
+ data in THIS script and satisfy preconditions.
+
+ Generic checks:
+ - Every argument variable is defined before use (no undefined names).
+ - No input()/Input/user_input usage anywhere in the script.
+ - Entities referenced by the action are confirmed to exist in retrieved data.
+ - Current state allows the action (e.g., status/eligibility/business constraints).
+ - Argument values are concrete runtime values (not placeholder strings).
+
+ Pattern:
+ # 1) Read current state
+ state = json.loads(actions.some_read_method(...))
+ print(state)
+
+ # 2) Validate preconditions and required values
+ # (extract concrete values from state; do not invent placeholders)
+
+ # 3) Execute write action only after checks
+ result = json.loads(actions.some_write_method(...))
+ print(result)
+
+7. LOOKUP + EXECUTION GUARDRAILS (GENERIC):
+ A) LOOKUP FALLBACK (max 2 tries):
+ - Try one primary lookup strategy.
+ - If it fails, try one alternative strategy.
+ - If both fail, DO NOT guess values and DO NOT run write actions.
+ Ask the user for the missing value(s) in a normal assistant message.
+
+ B) NEVER USE PLACEHOLDER LITERALS AS REAL VALUES:
+ Parameter names are not runtime values.
+ Forbidden as actual values: "user_id", "order_id", "item_id", "product_id",
+ "payment_method_id", "email", "first_name", "last_name", "zip", etc.
+
+ C) READ-THEN-WRITE DISCIPLINE:
+ Before any write action, explicitly read current state and confirm:
+ - target entity exists
+ - operation is allowed in current state
+ - selected replacement/option/value is valid in current data
+
+ D) ARGUMENT SANITY CHECKS BEFORE WRITE:
+ For each required argument:
+ - it is defined in this script
+ - it is non-empty and concrete (not placeholder text)
+ - it was derived from conversation/tool output
+ Print chosen argument values before the write call.
+
+ E) TWO-PHASE SCRIPT TEMPLATE:
+ - Phase 1: gather and print all required facts
+ - Phase 2: execute write action only if checks pass
+
+8. MANDATORY PRE-EXECUTION SELF-CHECK (RUN THIS MENTALLY BEFORE EVERY execute_python CALL):
+ - No `input()`, `input`, `Input`, or `user_input` anywhere in code.
+ - No placeholder literals used as real values (`"user_id"`, `"order_id"`, etc.).
+ - Every variable used in arguments is defined in THIS script.
+ - For every critical value, print it before write actions.
+ - If a lookup failed twice, DO NOT continue with write actions; ask user for corrected value.
+
+9. STRICT LOOKUP FAILURE RULE:
+ If authentication/lookup fails (target entity not found), you MUST stop mutation flow.
+ Do not call write actions after unresolved lookup failure.
+ Your next action is either:
+ - one alternative lookup attempt, OR
+ - ask the user for corrected identifying information.
+
+SILENT ERROR RECOVERY (MANDATORY):
+When execute_python returns an error, you MUST:
+1. NOT produce any text message — no apology, no explanation, no "let me try again"
+2. Immediately make another execute_python call with corrected code
+3. The user must NEVER know an error occurred
+
+This means your very next turn after an error is ONLY a tool call — zero words to the user.
+
+❌ WRONG: "I apologize for the error. Let me try a different approach." + execute_python
+❌ WRONG: "Let me fix that." + execute_python
+❌ WRONG: Any text at all before or alongside the retry
+✅ CORRECT: execute_python (with fixed code, no accompanying text)
+
+Only speak to the user when you have a successful result or need information from them.
+If after 3 retries you still cannot complete the task, you may say there is a temporary technical issue — but never mention implementation details.
+
+ANTI-LOOP GUARD (GENERIC):
+- If you hit the same error class twice in a row (e.g., repeated "not found" or repeated type/index error),
+ stop retrying variations and switch strategy:
+ 1) one alternative lookup/validation path, then
+ 2) ask user for the missing/corrected value if still unresolved.
+- Do not burn steps by repeating near-identical failing scripts.
+
+============================================================
+SECTION 2 — USE-CASE SPECIFIC EXAMPLES (BANKING)
+============================================================
+
+CRITICAL — WRITE COMPLETE SCRIPTS EXAMPLE:
+
+❌ WRONG (multiple execute_python calls, one action each):
+ Turn 1: execute_python → actions.find_customer_id_by_email("emma.reed@examplebank.com")
+ Turn 2: execute_python → actions.list_customer_cards("cust_emma_reed_102")
+ Turn 3: execute_python → actions.update_card_limits("cust_emma_reed_102", "card_emma_debit_4821", atm_withdrawal_limit=1200)
+
+✅ CORRECT (one execute_python call with a complete script):
+ execute_python →
+ import json
+ customer_id = actions.find_customer_id_by_email("emma.reed@examplebank.com")
+ print(f"Customer ID: {customer_id}")
+ profile = json.loads(actions.get_customer_profile(customer_id))
+ print(profile)
+ cards = json.loads(actions.list_customer_cards(customer_id))
+ print(cards)
+
+BANKING STATE TRUTH RULE (MANDATORY):
+- Never report card status, current limits, or customer profile details from memory.
+- Every factual state claim must come from tool output retrieved in the SAME execute_python script.
+- Before replying about card state, re-read with `get_customer_profile`, `list_customer_cards`, and/or `get_card_details`.
+
+BANKING IDENTIFIER DISCIPLINE:
+- Never invent customer IDs, card IDs, last-four values, statuses, or limits.
+- Start banking workflows with customer identification by `find_customer_id_by_email` or `find_customer_id_by_name_dob`.
+- If the user mentions only the last four digits, first authenticate the customer, then use `find_card_id_by_last_four`.
+- Before any mutation, verify the selected card belongs to the authenticated customer.
+
+BANKING API NAME DISCIPLINE (MANDATORY):
+- Use only banking actions listed in the current API REFERENCE.
+- Allowed read methods:
+ `actions.find_customer_id_by_email`, `actions.find_customer_id_by_name_dob`,
+ `actions.get_customer_profile`, `actions.list_customer_cards`,
+ `actions.find_card_id_by_last_four`, `actions.get_card_details`.
+- Allowed write methods:
+ `actions.update_card_limits`, `actions.freeze_card`, `actions.unfreeze_card`, `actions.block_card`.
+- Allowed utility methods:
+ `actions.calculate`, `actions.transfer_to_human_agents`.
+- If a method name is not in this allowlist or current API REFERENCE, do not call it.
+
+FIRST AUTH TURN TEMPLATE (BANKING):
+- After the user provides email, the first auth script must use:
+ 1) `customer_id = actions.find_customer_id_by_email(email)`
+ 2) `profile = json.loads(actions.get_customer_profile(customer_id))`
+ 3) `cards = json.loads(actions.list_customer_cards(customer_id))`
+ 4) print all three outputs
+- If the user provides name + date of birth instead, use `find_customer_id_by_name_dob` first, then the same read steps.
+
+AUTHENTICATION GATE (MANDATORY):
+- Never claim the user is authenticated before BOTH are true:
+ 1) the user explicitly provided email OR full name + date of birth, and
+ 2) the corresponding lookup succeeded in execute_python.
+- If authentication data is missing, ask for it and do NOT run banking tools.
+- If lookup fails, ask for corrected identifying information.
+- Do not reveal card details or current limits until authentication succeeds.
+- In the first successful auth response, include the exact authenticated `customer_id` from tool output.
+
+MUTATION EXECUTION CONTRACT (MANDATORY):
+- For all mutations (`update_card_limits`, `freeze_card`, `unfreeze_card`, `block_card`):
+ 1) first read and print current card state,
+ 2) summarize the exact requested change,
+ 3) ask for explicit confirmation,
+ 4) after the user confirms ("yes"), your NEXT turn must be an `execute_python` call that performs the mutation.
+- Never claim a mutation succeeded unless the tool output in that turn shows the updated card state.
+
+LIMIT CHANGE PRECHECKS (MANDATORY):
+Before `update_card_limits`, gather and print:
+1) authenticated `customer_id`
+2) chosen `card_id` and card `status`
+3) current `limits`
+4) `limit_bounds`
+5) the exact new limit values requested by the user
+
+If any requested value is outside the allowed bounds, do not call the write tool. Tell the user the allowed range instead.
+
+CARD STATUS RULES (MANDATORY):
+- `freeze_card` is only for cards currently in `active` status.
+- `unfreeze_card` is only for cards currently in `frozen` status.
+- `block_card` is permanent and allowed reasons are only: `lost`, `stolen`, `suspected_fraud`, `customer_request`.
+- Blocked cards cannot be unfrozen or have limits changed.
+- If the user is unsure whether they want a temporary or permanent action, explain the difference and ask them to choose before any write call.
+
+POST-WRITE VERIFICATION (MANDATORY):
+- After any successful write, print and inspect the returned card object.
+- Use the returned object as the source of truth for the final user-facing message.
+- Mention the final `status` and updated `limits` when relevant.
+
+TRANSFER TO HUMAN AGENT:
+To transfer, make an execute_python call with code: actions.transfer_to_human_agents("summary of the issue"), then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
+
+
+# Banking card management agent policy
+
+As a banking card-management agent, you can help users:
+
+- **authenticate their banking profile**
+- **review their own cards and current card-control settings**
+- **change supported daily card limits**
+- **temporarily freeze and unfreeze cards**
+- **permanently block cards for approved reasons**
+
+At the beginning of the conversation, you must authenticate the customer by locating their customer id via email, or via first name + last name + date of birth. This must be done even when the user already provides a customer id or card id.
+
+Once the customer has been authenticated, you can help only that one customer for the rest of the conversation. You must deny requests involving any other customer.
+
+Before taking any action that updates the database (limit change, freeze, unfreeze, block), you must list the action details and obtain explicit user confirmation (yes) to proceed.
+
+You should not make up any information or procedures not provided by the user or the tools, and you should not give subjective financial or security advice.
+
+You should at most make one tool call at a time, and if you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call at the same time.
+
+You should deny user requests that are against this policy.
+
+You should transfer the user to a human agent if and only if the request cannot be handled within the scope of your actions. To transfer, first make a tool call to `transfer_to_human_agents`, and then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
+
+## Domain basics
+
+- All timestamps in the database are UTC and ISO 8601 based. For example `2026-03-20T09:00:00Z` means 9:00 AM UTC.
+
+### Customer
+
+Each customer has a profile containing:
+
+- unique customer id
+- full name
+- email
+- date of birth
+- registered address
+- registered phone last four digits
+- list of owned card ids
+
+### Card
+
+Each card has the following attributes:
+
+- unique card id
+- customer id
+- nickname
+- product name
+- card type (`debit` or `credit`)
+- network
+- linked account id
+- last four digits
+- expiry month and year
+- current status
+- adjustable daily limits
+- allowed minimum and maximum limit values
+- audit event history
+
+A card status can be:
+
+- **active** — card can be used normally
+- **frozen** — temporary lock; the card may be unfrozen later
+- **blocked** — permanent lock; the card cannot be unfrozen or edited later
+
+## Generic action rules
+
+- Always authenticate the customer before revealing card details or changing card state.
+- Always read current card state before any write action.
+- Always verify that the card belongs to the authenticated customer.
+- If the user references a card only by last four digits, authenticate first and then use `find_card_id_by_last_four`.
+
+## Change card limits
+
+A card limit change can only be performed when the card status is `active` or `frozen`.
+
+You may change one or more of these limits only:
+
+- `atm_withdrawal_limit`
+- `pos_purchase_limit`
+- `ecommerce_purchase_limit`
+- `contactless_purchase_limit`
+
+Before the change, you must check the card's current `limit_bounds`. If a requested value falls outside the allowed range, you must not call the write tool.
+
+After user confirmation, the tool updates the limits immediately.
+
+## Freeze card
+
+A card can only be frozen if its status is `active`.
+
+The user must provide a reason for the temporary freeze.
+
+After user confirmation, the card status changes to `frozen` and the temporary block reason is stored.
+
+## Unfreeze card
+
+A card can only be unfrozen if its status is `frozen`.
+
+After user confirmation, the card status changes back to `active`.
+
+## Block card
+
+A card can be permanently blocked only for one of these reasons:
+
+- `lost`
+- `stolen`
+- `suspected_fraud`
+- `customer_request`
+
+A blocked card stays blocked. It cannot be unfrozen and its limits cannot be changed.
+
+If the user only wants a temporary lock, you should use the freeze flow instead of the block flow.
+
+## Out of scope
+
+You must not handle requests for:
+
+- PIN resets
+- card replacement orders
+- transaction disputes or chargebacks
+- balance transfers
+- statement questions
+- account closure
+
+For those requests, transfer to a human agent.
+
diff --git a/sample_solutions/AgenticCodeExecution/examples/banking/banking_data_model.py b/sample_solutions/AgenticCodeExecution/examples/banking/banking_data_model.py
new file mode 100644
index 00000000..4ca08b26
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/banking/banking_data_model.py
@@ -0,0 +1,142 @@
+"""Data models for the banking card-management domain."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional
+
+from pydantic import BaseModel, Field
+
+
+CardStatus = Literal["active", "frozen", "blocked"]
+CardType = Literal["debit", "credit"]
+BlockReason = Literal["lost", "stolen", "suspected_fraud", "customer_request"]
+
+
+class CustomerName(BaseModel):
+ """Customer full name."""
+
+ first_name: str = Field(description="First name")
+ last_name: str = Field(description="Last name")
+
+
+class CustomerAddress(BaseModel):
+ """Customer mailing address."""
+
+ address1: str = Field(description="First line of the address")
+ address2: str = Field(description="Second line of the address")
+ city: str = Field(description="City")
+ state: str = Field(description="State")
+ country: str = Field(description="Country")
+ zip: str = Field(description="ZIP / postal code")
+
+
+class CardLimits(BaseModel):
+ """Daily card control limits."""
+
+ atm_withdrawal_limit: int = Field(description="Daily ATM withdrawal limit in USD")
+ pos_purchase_limit: int = Field(description="Daily in-person purchase limit in USD")
+ ecommerce_purchase_limit: int = Field(description="Daily e-commerce purchase limit in USD")
+ contactless_purchase_limit: int = Field(description="Daily contactless purchase limit in USD")
+
+
+class CardLimitBounds(BaseModel):
+ """Allowed minimum and maximum values for each adjustable limit."""
+
+ minimum: CardLimits = Field(description="Minimum allowed values")
+ maximum: CardLimits = Field(description="Maximum allowed values")
+
+
+class CardEvent(BaseModel):
+ """Audit record for a card state change."""
+
+ timestamp: str = Field(description="Event timestamp in ISO-8601 UTC format")
+ action: str = Field(description="Action that occurred")
+ details: Dict[str, Any] = Field(description="Structured details for the action")
+
+
+class Card(BaseModel):
+ """Bank card record with controls and state."""
+
+ card_id: str = Field(description="Unique card identifier")
+ customer_id: str = Field(description="Owning customer identifier")
+ nickname: str = Field(description="Customer-visible nickname for the card")
+ product_name: str = Field(description="Card product name")
+ card_type: CardType = Field(description="Card type")
+ network: str = Field(description="Card network, such as Visa or Mastercard")
+ linked_account: str = Field(description="Linked deposit or credit account identifier")
+ last_four: str = Field(description="Last four digits of the card")
+ expiry_month: int = Field(description="Expiry month")
+ expiry_year: int = Field(description="Expiry year")
+ status: CardStatus = Field(description="Current card status")
+ limits: CardLimits = Field(description="Current active card limits")
+ limit_bounds: CardLimitBounds = Field(description="Allowed min/max limit values")
+ temporary_block_reason: Optional[str] = Field(
+ default=None,
+ description="Temporary freeze reason, if the card is frozen",
+ )
+ block_reason: Optional[BlockReason] = Field(
+ default=None,
+ description="Permanent block reason, if the card is blocked",
+ )
+ events: List[CardEvent] = Field(
+ default_factory=list,
+ description="Audit history for this card",
+ )
+
+
+class Customer(BaseModel):
+ """Bank customer profile."""
+
+ customer_id: str = Field(description="Unique customer identifier")
+ name: CustomerName = Field(description="Customer name")
+ email: str = Field(description="Customer email address")
+ date_of_birth: str = Field(description="Date of birth in YYYY-MM-DD format")
+ phone_last_four: str = Field(description="Last four digits of the registered phone number")
+ address: CustomerAddress = Field(description="Primary mailing address")
+ cards: List[str] = Field(description="List of card ids owned by the customer")
+
+
+class BankingDB(BaseModel):
+ """Database containing banking card-management data."""
+
+ model_config = {"extra": "allow"}
+
+ customers: Dict[str, Customer] = Field(
+ description="Dictionary of customers indexed by customer id"
+ )
+ cards: Dict[str, Card] = Field(
+ description="Dictionary of cards indexed by card id"
+ )
+ meta: Dict[str, Any] = Field(
+ default_factory=dict,
+ description="Database metadata",
+ )
+
+ _db_path: str = ""
+
+ @classmethod
+ def load(cls, path: str | Path) -> "BankingDB":
+ """Load the database from a JSON file."""
+ with open(path, "r") as f:
+ data = json.load(f)
+ db = cls.model_validate(data)
+ db._db_path = str(path)
+ return db
+
+ def save(self) -> None:
+ """Save the database back to the JSON file."""
+ if self._db_path:
+ with open(self._db_path, "w") as f:
+ json.dump(self.model_dump(exclude={"_db_path"}), f, indent=2)
+ print(f"Database saved to {self._db_path}")
+
+ def get_statistics(self) -> Dict[str, Any]:
+ """Get a summary of the database contents."""
+ return {
+ "num_customers": len(self.customers),
+ "num_cards": len(self.cards),
+ "num_blocked_cards": sum(1 for card in self.cards.values() if card.status == "blocked"),
+ "num_frozen_cards": sum(1 for card in self.cards.values() if card.status == "frozen"),
+ }
diff --git a/sample_solutions/AgenticCodeExecution/examples/banking/data/db.json b/sample_solutions/AgenticCodeExecution/examples/banking/data/db.json
new file mode 100644
index 00000000..ef22a2f3
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/banking/data/db.json
@@ -0,0 +1,310 @@
+{
+ "customers": {
+ "cust_emma_reed_102": {
+ "customer_id": "cust_emma_reed_102",
+ "name": {
+ "first_name": "Emma",
+ "last_name": "Reed"
+ },
+ "email": "emma.reed@examplebank.com",
+ "date_of_birth": "1988-04-12",
+ "phone_last_four": "1842",
+ "address": {
+ "address1": "14 Cedar Lane",
+ "address2": "Apt 5B",
+ "city": "Boston",
+ "state": "MA",
+ "country": "USA",
+ "zip": "02116"
+ },
+ "cards": [
+ "card_emma_debit_4821",
+ "card_emma_credit_7719"
+ ]
+ },
+ "cust_marco_silva_204": {
+ "customer_id": "cust_marco_silva_204",
+ "name": {
+ "first_name": "Marco",
+ "last_name": "Silva"
+ },
+ "email": "marco.silva@examplebank.com",
+ "date_of_birth": "1979-11-03",
+ "phone_last_four": "9044",
+ "address": {
+ "address1": "88 Harbor Street",
+ "address2": "",
+ "city": "Miami",
+ "state": "FL",
+ "country": "USA",
+ "zip": "33131"
+ },
+ "cards": [
+ "card_marco_credit_1147"
+ ]
+ },
+ "cust_priya_natarajan_318": {
+ "customer_id": "cust_priya_natarajan_318",
+ "name": {
+ "first_name": "Priya",
+ "last_name": "Natarajan"
+ },
+ "email": "priya.natarajan@examplebank.com",
+ "date_of_birth": "1992-08-27",
+ "phone_last_four": "6610",
+ "address": {
+ "address1": "501 Westfield Drive",
+ "address2": "Unit 12",
+ "city": "Seattle",
+ "state": "WA",
+ "country": "USA",
+ "zip": "98109"
+ },
+ "cards": [
+ "card_priya_debit_9033",
+ "card_priya_travel_5520"
+ ]
+ }
+ },
+ "cards": {
+ "card_emma_debit_4821": {
+ "card_id": "card_emma_debit_4821",
+ "customer_id": "cust_emma_reed_102",
+ "nickname": "Daily debit",
+ "product_name": "Everyday Checking Debit",
+ "card_type": "debit",
+ "network": "Visa",
+ "linked_account": "CHK-100214",
+ "last_four": "4821",
+ "expiry_month": 8,
+ "expiry_year": 2028,
+ "status": "active",
+ "limits": {
+ "atm_withdrawal_limit": 800,
+ "pos_purchase_limit": 3000,
+ "ecommerce_purchase_limit": 1500,
+ "contactless_purchase_limit": 200
+ },
+ "limit_bounds": {
+ "minimum": {
+ "atm_withdrawal_limit": 100,
+ "pos_purchase_limit": 500,
+ "ecommerce_purchase_limit": 200,
+ "contactless_purchase_limit": 50
+ },
+ "maximum": {
+ "atm_withdrawal_limit": 2000,
+ "pos_purchase_limit": 7000,
+ "ecommerce_purchase_limit": 4000,
+ "contactless_purchase_limit": 500
+ }
+ },
+ "temporary_block_reason": null,
+ "block_reason": null,
+ "events": [
+ {
+ "timestamp": "2025-01-15T14:20:00Z",
+ "action": "card_issued",
+ "details": {
+ "channel": "mobile_app"
+ }
+ }
+ ]
+ },
+ "card_emma_credit_7719": {
+ "card_id": "card_emma_credit_7719",
+ "customer_id": "cust_emma_reed_102",
+ "nickname": "Travel rewards",
+ "product_name": "Premium Rewards Credit",
+ "card_type": "credit",
+ "network": "Mastercard",
+ "linked_account": "CRD-550812",
+ "last_four": "7719",
+ "expiry_month": 11,
+ "expiry_year": 2029,
+ "status": "frozen",
+ "limits": {
+ "atm_withdrawal_limit": 1200,
+ "pos_purchase_limit": 6000,
+ "ecommerce_purchase_limit": 3500,
+ "contactless_purchase_limit": 300
+ },
+ "limit_bounds": {
+ "minimum": {
+ "atm_withdrawal_limit": 200,
+ "pos_purchase_limit": 1000,
+ "ecommerce_purchase_limit": 500,
+ "contactless_purchase_limit": 100
+ },
+ "maximum": {
+ "atm_withdrawal_limit": 2500,
+ "pos_purchase_limit": 10000,
+ "ecommerce_purchase_limit": 6000,
+ "contactless_purchase_limit": 800
+ }
+ },
+ "temporary_block_reason": "Customer requested a temporary lock while traveling",
+ "block_reason": null,
+ "events": [
+ {
+ "timestamp": "2024-09-10T09:15:00Z",
+ "action": "card_issued",
+ "details": {
+ "channel": "branch"
+ }
+ },
+ {
+ "timestamp": "2026-03-08T18:42:00Z",
+ "action": "card_frozen",
+ "details": {
+ "reason": "Customer requested a temporary lock while traveling"
+ }
+ }
+ ]
+ },
+ "card_marco_credit_1147": {
+ "card_id": "card_marco_credit_1147",
+ "customer_id": "cust_marco_silva_204",
+ "nickname": "Main credit",
+ "product_name": "Platinum Cashback Credit",
+ "card_type": "credit",
+ "network": "Visa",
+ "linked_account": "CRD-661904",
+ "last_four": "1147",
+ "expiry_month": 5,
+ "expiry_year": 2027,
+ "status": "blocked",
+ "limits": {
+ "atm_withdrawal_limit": 500,
+ "pos_purchase_limit": 4500,
+ "ecommerce_purchase_limit": 2500,
+ "contactless_purchase_limit": 250
+ },
+ "limit_bounds": {
+ "minimum": {
+ "atm_withdrawal_limit": 100,
+ "pos_purchase_limit": 1000,
+ "ecommerce_purchase_limit": 300,
+ "contactless_purchase_limit": 50
+ },
+ "maximum": {
+ "atm_withdrawal_limit": 1500,
+ "pos_purchase_limit": 8000,
+ "ecommerce_purchase_limit": 4000,
+ "contactless_purchase_limit": 500
+ }
+ },
+ "temporary_block_reason": null,
+ "block_reason": "suspected_fraud",
+ "events": [
+ {
+ "timestamp": "2023-06-21T11:00:00Z",
+ "action": "card_issued",
+ "details": {
+ "channel": "branch"
+ }
+ },
+ {
+ "timestamp": "2026-02-28T07:31:00Z",
+ "action": "card_blocked",
+ "details": {
+ "reason": "suspected_fraud"
+ }
+ }
+ ]
+ },
+ "card_priya_debit_9033": {
+ "card_id": "card_priya_debit_9033",
+ "customer_id": "cust_priya_natarajan_318",
+ "nickname": "Household debit",
+ "product_name": "Smart Checking Debit",
+ "card_type": "debit",
+ "network": "Visa",
+ "linked_account": "CHK-204990",
+ "last_four": "9033",
+ "expiry_month": 2,
+ "expiry_year": 2029,
+ "status": "active",
+ "limits": {
+ "atm_withdrawal_limit": 1000,
+ "pos_purchase_limit": 2800,
+ "ecommerce_purchase_limit": 1800,
+ "contactless_purchase_limit": 250
+ },
+ "limit_bounds": {
+ "minimum": {
+ "atm_withdrawal_limit": 100,
+ "pos_purchase_limit": 500,
+ "ecommerce_purchase_limit": 200,
+ "contactless_purchase_limit": 50
+ },
+ "maximum": {
+ "atm_withdrawal_limit": 2200,
+ "pos_purchase_limit": 6000,
+ "ecommerce_purchase_limit": 4500,
+ "contactless_purchase_limit": 600
+ }
+ },
+ "temporary_block_reason": null,
+ "block_reason": null,
+ "events": [
+ {
+ "timestamp": "2025-05-02T16:05:00Z",
+ "action": "card_issued",
+ "details": {
+ "channel": "mobile_app"
+ }
+ }
+ ]
+ },
+ "card_priya_travel_5520": {
+ "card_id": "card_priya_travel_5520",
+ "customer_id": "cust_priya_natarajan_318",
+ "nickname": "Travel card",
+ "product_name": "Explorer Travel Credit",
+ "card_type": "credit",
+ "network": "Mastercard",
+ "linked_account": "CRD-909221",
+ "last_four": "5520",
+ "expiry_month": 9,
+ "expiry_year": 2030,
+ "status": "active",
+ "limits": {
+ "atm_withdrawal_limit": 1500,
+ "pos_purchase_limit": 7500,
+ "ecommerce_purchase_limit": 5000,
+ "contactless_purchase_limit": 400
+ },
+ "limit_bounds": {
+ "minimum": {
+ "atm_withdrawal_limit": 200,
+ "pos_purchase_limit": 1500,
+ "ecommerce_purchase_limit": 500,
+ "contactless_purchase_limit": 100
+ },
+ "maximum": {
+ "atm_withdrawal_limit": 3000,
+ "pos_purchase_limit": 12000,
+ "ecommerce_purchase_limit": 8000,
+ "contactless_purchase_limit": 1000
+ }
+ },
+ "temporary_block_reason": null,
+ "block_reason": null,
+ "events": [
+ {
+ "timestamp": "2024-12-19T10:45:00Z",
+ "action": "card_issued",
+ "details": {
+ "channel": "branch"
+ }
+ }
+ ]
+ }
+ },
+ "meta": {
+ "bank_name": "Example Bank",
+ "currency": "USD",
+ "updated_at": "2026-03-20T09:00:00Z"
+ }
+}
diff --git a/sample_solutions/AgenticCodeExecution/examples/banking/mcp_banking_server.py b/sample_solutions/AgenticCodeExecution/examples/banking/mcp_banking_server.py
new file mode 100644
index 00000000..4b103fc3
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/banking/mcp_banking_server.py
@@ -0,0 +1,561 @@
+#!/usr/bin/env python3
+"""
+MCP Server for Banking Card-Management Tools - Fully Standalone
+
+All business logic is directly in the MCP tools - no intermediate wrapper classes.
+"""
+
+import argparse
+import json
+import os
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, Optional, cast
+
+from fastmcp import FastMCP
+
+# Add parent directory to sys.path for shared modules (error_hints)
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from banking_data_model import (
+ BankingDB,
+ BlockReason,
+ Card,
+ CardEvent,
+ CardLimitBounds,
+ CardLimits,
+ Customer,
+ CustomerAddress,
+ CustomerName,
+)
+from error_hints import analyze_execution_error
+
+
+DEFAULT_DB_PATH = str(Path(__file__).resolve().parent / "data" / "db.json")
+ALLOWED_BLOCK_REASONS = {"lost", "stolen", "suspected_fraud", "customer_request"}
+
+
+def ensure_db(db_path: str) -> None:
+ """Check that the banking database exists; exit with instructions if missing."""
+ if Path(db_path).exists():
+ return
+ print(f"\n❌ Database not found: {db_path}")
+ print(f" The banking database is included in the repository.")
+ print(f" Make sure the data/ directory is present (e.g. git checkout).")
+ sys.exit(1)
+
+
+mcp = FastMCP(
+ "Banking Tools Server",
+ instructions="""You are a banking card-management support agent. Use these tools to help customers with:
+- Authenticating their profile by email or name + date of birth
+- Reviewing their cards and current card-control limits
+- Changing supported daily card limits within allowed bounds
+- Freezing and unfreezing cards
+- Permanently blocking cards for lost/stolen/fraud/customer-request reasons
+
+Always verify the customer's identity before revealing card details or making changes. Ask for explicit confirmation before any mutation.""",
+)
+
+_db: Optional[BankingDB] = None
+_original_db_path: str = ""
+_session_dbs: Dict[str, BankingDB] = {}
+SESSION_DB_DIR = Path(__file__).resolve().parent.parent / "session_dbs"
+SESSION_DB_DIR.mkdir(exist_ok=True)
+
+
+def _now_iso() -> str:
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def _session_db_file(session_id: str) -> Path:
+ safe = "".join(ch for ch in session_id if ch.isalnum() or ch in ("-", "_"))
+ if not safe:
+ safe = "session"
+ return SESSION_DB_DIR / f"banking_{safe[:64]}.json"
+
+
+def get_db(session_id: str = "") -> BankingDB:
+ """Get the database for a given session."""
+ global _db, _original_db_path
+
+ if _db is None:
+ db_path = os.environ.get("BANKING_DB_PATH", DEFAULT_DB_PATH)
+ _original_db_path = db_path
+ _db = BankingDB.load(db_path)
+ _db._db_path = ""
+ print(f"Loaded template banking database from {db_path}")
+ print(f" - {len(_db.customers)} customers")
+ print(f" - {len(_db.cards)} cards")
+
+ if not session_id:
+ return _db
+
+ if session_id not in _session_dbs:
+ db = BankingDB.load(_original_db_path)
+ db._db_path = str(_session_db_file(session_id))
+ _session_dbs[session_id] = db
+ print(f"🆕 Created pristine banking DB for session {session_id[:8]}... ({len(_session_dbs)} active sessions)")
+
+ return _session_dbs[session_id]
+
+
+def _save_db(db: BankingDB) -> None:
+ db.meta["updated_at"] = _now_iso()
+ db.save()
+
+
+def _require_customer(db: BankingDB, customer_id: str) -> Customer:
+ customer = db.customers.get(customer_id)
+ if not customer:
+ raise ValueError("Customer not found")
+ return customer
+
+
+def _require_card(db: BankingDB, customer_id: str, card_id: str) -> Card:
+ customer = _require_customer(db, customer_id)
+ if card_id not in customer.cards:
+ raise ValueError("Card does not belong to customer")
+
+ card = db.cards.get(card_id)
+ if not card:
+ raise ValueError("Card not found")
+ return card
+
+
+def _append_event(card: Card, action: str, details: Dict[str, Any]) -> None:
+ card.events.append(
+ CardEvent(
+ timestamp=_now_iso(),
+ action=action,
+ details=details,
+ )
+ )
+
+
+def _card_summary(card: Card) -> Dict[str, Any]:
+ return {
+ "card_id": card.card_id,
+ "nickname": card.nickname,
+ "product_name": card.product_name,
+ "card_type": card.card_type,
+ "network": card.network,
+ "last_four": card.last_four,
+ "status": card.status,
+ "linked_account": card.linked_account,
+ "limits": card.limits.model_dump(),
+ "limit_bounds": card.limit_bounds.model_dump(),
+ "temporary_block_reason": card.temporary_block_reason,
+ "block_reason": card.block_reason,
+ }
+
+
+def _get_data_model_defs() -> Dict[str, dict]:
+ model_classes = [
+ CustomerName,
+ CustomerAddress,
+ CardLimits,
+ CardLimitBounds,
+ CardEvent,
+ Card,
+ Customer,
+ ]
+ defs: Dict[str, dict] = {}
+ for model_cls in model_classes:
+ schema = model_cls.model_json_schema(ref_template="#/$defs/{model}")
+ defs[model_cls.__name__] = {
+ "description": schema.get("description", ""),
+ "properties": schema.get("properties", {}),
+ }
+ return defs
+
+
+def _get_tool_metadata_payload() -> Dict[str, Any]:
+ ordered_actions = [
+ "calculate",
+ "find_customer_id_by_email",
+ "find_customer_id_by_name_dob",
+ "get_customer_profile",
+ "list_customer_cards",
+ "find_card_id_by_last_four",
+ "get_card_details",
+ "update_card_limits",
+ "freeze_card",
+ "unfreeze_card",
+ "block_card",
+ "transfer_to_human_agents",
+ ]
+
+ return {
+ "ordered_actions": ordered_actions,
+ "return_types": {
+ "calculate": "str",
+ "find_customer_id_by_email": "str",
+ "find_customer_id_by_name_dob": "str",
+ "get_customer_profile": "str (JSON)",
+ "list_customer_cards": "str (JSON)",
+ "find_card_id_by_last_four": "str",
+ "get_card_details": "str (JSON)",
+ "update_card_limits": "str (JSON)",
+ "freeze_card": "str (JSON)",
+ "unfreeze_card": "str (JSON)",
+ "block_card": "str (JSON)",
+ "transfer_to_human_agents": "str",
+ },
+ "semantic_types": {
+ "get_customer_profile": "Customer",
+ "list_customer_cards": "dict[card_id, CardSummary]",
+ "get_card_details": "Card",
+ "update_card_limits": "Card",
+ "freeze_card": "Card",
+ "unfreeze_card": "Card",
+ "block_card": "Card",
+ },
+ "data_model_defs": _get_data_model_defs(),
+ }
+
+
+@mcp.tool()
+def find_customer_id_by_email(email: str, session_id: str = "") -> str:
+ """Find customer id by email. Use this first to identify a customer.
+
+ Args:
+ email: Customer email such as 'emma.reed@examplebank.com'.
+
+ Returns:
+ The customer id if found.
+ """
+ db = get_db(session_id)
+ for customer_id, customer in db.customers.items():
+ if customer.email.lower() == email.lower():
+ return customer_id
+ raise ValueError("Customer not found")
+
+
+@mcp.tool()
+def find_customer_id_by_name_dob(first_name: str, last_name: str, date_of_birth: str, session_id: str = "") -> str:
+ """Find customer id by first name, last name, and date of birth.
+
+ Args:
+ first_name: Customer first name.
+ last_name: Customer last name.
+ date_of_birth: Date of birth in YYYY-MM-DD format.
+
+ Returns:
+ The customer id if found.
+ """
+ db = get_db(session_id)
+ for customer_id, customer in db.customers.items():
+ if (
+ customer.name.first_name.lower() == first_name.lower()
+ and customer.name.last_name.lower() == last_name.lower()
+ and customer.date_of_birth == date_of_birth
+ ):
+ return customer_id
+ raise ValueError("Customer not found")
+
+
+@mcp.tool()
+def get_customer_profile(customer_id: str, session_id: str = "") -> str:
+ """Get customer profile information.
+
+ Args:
+ customer_id: Authenticated customer id.
+
+ Returns:
+ A JSON STRING customer object.
+ """
+ db = get_db(session_id)
+ customer = _require_customer(db, customer_id)
+ return customer.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def list_customer_cards(customer_id: str, session_id: str = "") -> str:
+ """List all cards for an authenticated customer.
+
+ Args:
+ customer_id: Authenticated customer id.
+
+ Returns:
+ A JSON STRING dictionary keyed by card_id.
+ """
+ db = get_db(session_id)
+ customer = _require_customer(db, customer_id)
+ payload = {
+ card_id: _card_summary(db.cards[card_id])
+ for card_id in customer.cards
+ if card_id in db.cards
+ }
+ return json.dumps(payload, indent=2)
+
+
+@mcp.tool()
+def find_card_id_by_last_four(customer_id: str, last_four: str, session_id: str = "") -> str:
+ """Find a customer's card id by the last four digits.
+
+ Args:
+ customer_id: Authenticated customer id.
+ last_four: Last four digits of the card.
+
+ Returns:
+ Matching card id.
+ """
+ db = get_db(session_id)
+ customer = _require_customer(db, customer_id)
+ for card_id in customer.cards:
+ card = db.cards.get(card_id)
+ if card and card.last_four == last_four:
+ return card_id
+ raise ValueError("Card not found")
+
+
+@mcp.tool()
+def get_card_details(customer_id: str, card_id: str, session_id: str = "") -> str:
+ """Get detailed information for one of the customer's cards.
+
+ Args:
+ customer_id: Authenticated customer id.
+ card_id: Card id to inspect.
+
+ Returns:
+ A JSON STRING card object.
+ """
+ db = get_db(session_id)
+ card = _require_card(db, customer_id, card_id)
+ return card.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def calculate(expression: str, session_id: str = "") -> str:
+ """Calculate the result of a mathematical expression.
+
+ Args:
+ expression: Expression such as '2500 - 1500'.
+
+ Returns:
+ The calculated result as a string.
+ """
+ if not all(char in "0123456789+-*/(). " for char in expression):
+ raise ValueError("Invalid characters in expression")
+ return str(round(float(eval(expression, {"__builtins__": None}, {})), 6))
+
+
+@mcp.tool()
+def transfer_to_human_agents(summary: str, session_id: str = "") -> str:
+ """Transfer the customer to a human agent.
+
+ Returns:
+ Confirmation message.
+ """
+ return "Transfer successful"
+
+
+@mcp.tool()
+def get_execution_error_hint(error_msg: str, code: str = "", session_id: str = "") -> str:
+ """Return a recovery hint for sandbox execution/tool errors."""
+ return analyze_execution_error(error_msg=error_msg, code=code, domain="banking")
+
+
+@mcp.tool()
+def get_tool_metadata(session_id: str = "") -> str:
+ """Return metadata used to build execute_python action/data-model description."""
+ return json.dumps(_get_tool_metadata_payload())
+
+
+@mcp.tool()
+def update_card_limits(
+ customer_id: str,
+ card_id: str,
+ atm_withdrawal_limit: Optional[int] = None,
+ pos_purchase_limit: Optional[int] = None,
+ ecommerce_purchase_limit: Optional[int] = None,
+ contactless_purchase_limit: Optional[int] = None,
+ session_id: str = "",
+) -> str:
+ """Update one or more daily card limits within the allowed bounds.
+
+ Ask for explicit user confirmation before making changes.
+
+ Returns:
+ A JSON STRING card object.
+ """
+ db = get_db(session_id)
+ card = _require_card(db, customer_id, card_id)
+
+ if card.status == "blocked":
+ raise ValueError("Blocked cards cannot have limits changed")
+
+ requested_updates = {
+ "atm_withdrawal_limit": atm_withdrawal_limit,
+ "pos_purchase_limit": pos_purchase_limit,
+ "ecommerce_purchase_limit": ecommerce_purchase_limit,
+ "contactless_purchase_limit": contactless_purchase_limit,
+ }
+ changes = {key: value for key, value in requested_updates.items() if value is not None}
+ if not changes:
+ raise ValueError("At least one limit value must be provided")
+
+ old_limits = card.limits.model_dump()
+ new_limits = card.limits.model_dump()
+
+ for field_name, new_value in changes.items():
+ minimum = getattr(card.limit_bounds.minimum, field_name)
+ maximum = getattr(card.limit_bounds.maximum, field_name)
+ if int(new_value) < minimum or int(new_value) > maximum:
+ raise ValueError(
+ f"{field_name} must be between {minimum} and {maximum}"
+ )
+ setattr(card.limits, field_name, int(new_value))
+ new_limits[field_name] = int(new_value)
+
+ _append_event(
+ card,
+ "limits_updated",
+ {
+ "old_limits": old_limits,
+ "new_limits": new_limits,
+ },
+ )
+ _save_db(db)
+ return card.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def freeze_card(customer_id: str, card_id: str, reason: str, session_id: str = "") -> str:
+ """Temporarily freeze a card.
+
+ Ask for explicit user confirmation before freezing the card.
+
+ Args:
+ reason: Non-empty customer-provided reason for the temporary freeze.
+
+ Returns:
+ A JSON STRING card object.
+ """
+ db = get_db(session_id)
+ card = _require_card(db, customer_id, card_id)
+
+ clean_reason = reason.strip()
+ if not clean_reason:
+ raise ValueError("Freeze reason is required")
+ if card.status == "blocked":
+ raise ValueError("Blocked cards cannot be frozen")
+ if card.status == "frozen":
+ raise ValueError("Card is already frozen")
+
+ card.status = "frozen"
+ card.temporary_block_reason = clean_reason
+ _append_event(
+ card,
+ "card_frozen",
+ {"reason": clean_reason},
+ )
+ _save_db(db)
+ return card.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def unfreeze_card(customer_id: str, card_id: str, session_id: str = "") -> str:
+ """Remove a temporary freeze from a card.
+
+ Ask for explicit user confirmation before unfreezing the card.
+
+ Returns:
+ A JSON STRING card object.
+ """
+ db = get_db(session_id)
+ card = _require_card(db, customer_id, card_id)
+
+ if card.status != "frozen":
+ raise ValueError("Only frozen cards can be unfrozen")
+
+ previous_reason = card.temporary_block_reason
+ card.status = "active"
+ card.temporary_block_reason = None
+ _append_event(
+ card,
+ "card_unfrozen",
+ {"previous_reason": previous_reason},
+ )
+ _save_db(db)
+ return card.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def block_card(customer_id: str, card_id: str, reason: str, session_id: str = "") -> str:
+ """Permanently block a card.
+
+ Ask for explicit user confirmation before blocking the card.
+
+ Args:
+ reason: One of 'lost', 'stolen', 'suspected_fraud', or 'customer_request'.
+
+ Returns:
+ A JSON STRING card object.
+ """
+ db = get_db(session_id)
+ card = _require_card(db, customer_id, card_id)
+
+ normalized_reason = reason.strip().lower().replace(" ", "_").replace("-", "_")
+ if normalized_reason not in ALLOWED_BLOCK_REASONS:
+ raise ValueError(
+ "Block reason must be one of: lost, stolen, suspected_fraud, customer_request"
+ )
+ if card.status == "blocked":
+ raise ValueError("Card is already blocked")
+
+ card.status = "blocked"
+ card.temporary_block_reason = None
+ card.block_reason = cast(BlockReason, normalized_reason)
+ _append_event(
+ card,
+ "card_blocked",
+ {"reason": normalized_reason},
+ )
+ _save_db(db)
+ return card.model_dump_json(indent=2)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Banking MCP Server")
+ parser.add_argument(
+ "--db-path",
+ default=DEFAULT_DB_PATH,
+ help="Path to the banking database JSON file",
+ )
+ parser.add_argument(
+ "--port",
+ type=int,
+ default=5050,
+ help="Port to run the SSE server on",
+ )
+ parser.add_argument(
+ "--host",
+ default="0.0.0.0",
+ help="Host to bind to",
+ )
+ parser.add_argument(
+ "--transport",
+ choices=["sse", "stdio"],
+ default="sse",
+ help="Transport protocol to use",
+ )
+
+ args = parser.parse_args()
+ os.environ["BANKING_DB_PATH"] = args.db_path
+
+ ensure_db(args.db_path)
+ get_db()
+ print(" Original DB file is READ-ONLY (per-session copies used for mutations)")
+ print(f" Session DB dir: {SESSION_DB_DIR}")
+
+ print("\n🚀 Starting Banking MCP Server...")
+ print(f" Transport: {args.transport}")
+ if args.transport == "sse":
+ print(f" Host: {args.host}")
+ print(f" Port: {args.port}")
+ print(f" SSE endpoint: http://{args.host}:{args.port}/sse")
+
+ mcp.run(transport=args.transport, host=args.host, port=args.port)
diff --git a/sample_solutions/AgenticCodeExecution/examples/error_hints.py b/sample_solutions/AgenticCodeExecution/examples/error_hints.py
new file mode 100644
index 00000000..bea9ceeb
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/error_hints.py
@@ -0,0 +1,170 @@
+import re
+
+
+def analyze_execution_error(error_msg: str, code: str = "", domain: str = "generic") -> str:
+ """Analyze execution/tool errors and return a recovery hint string.
+
+ Domain-specific servers can call this with a domain label to customize messaging.
+ """
+ if not error_msg:
+ return ""
+
+ if "is not defined" in error_msg and "name '" in error_msg:
+ match = re.search(r"name '(\w+)' is not defined", error_msg)
+ var_name = match.group(1) if match else "variable"
+
+ if domain == "airline":
+ common_params = [
+ "first_name",
+ "last_name",
+ "email",
+ "user_id",
+ "flight_number",
+ "reservation_id",
+ "origin",
+ "destination",
+ ]
+ if var_name in common_params:
+ return (
+ f"ERROR: '{var_name}' is not defined. You must use a STRING LITERAL with "
+ "the actual value from the conversation, not an undefined variable. "
+ "For example, use \"John\" instead of first_name."
+ )
+
+ return (
+ f"REMINDER: The sandbox is STATELESS. '{var_name}' was not defined in this "
+ f"script. Variables from previous execute_python calls do NOT persist. "
+ f"You must define all variables in the SAME script. "
+ f"If '{var_name}' should be a value from the conversation, use a string literal."
+ )
+
+ if "'str' object has no attribute" in error_msg:
+ attr_match = re.search(r"'str' object has no attribute '(\w+)'", error_msg)
+ attr_name = attr_match.group(1) if attr_match else ""
+ return (
+ f"You are accessing '.{attr_name}' on a STRING. "
+ "The actions.* method likely returned a JSON string, not a Python object. "
+ "Parse it first with json.loads():\n"
+ " import json\n"
+ " result = json.loads(actions.some_method(arg))\n"
+ f" print(result['{attr_name}']) # Access as dict key, not attribute\n"
+ "Also check: iterating over a dict yields keys (strings), not objects. "
+ "Use `.items()` to get (key, value) pairs."
+ )
+
+ if "string indices must be integers" in error_msg:
+ if domain == "airline" and "db." in code:
+ return (
+ "You are trying to access a string as a dictionary. Common causes: "
+ "1) JSON string vs dict confusion, "
+ "2) Iterating `db.users` / `db.flights` / `db.reservations` directly (yielding keys)."
+ )
+
+ hint_parts = []
+
+ has_json_loads = "json.loads" in code
+ has_for_loop = re.search(r"for\s+(\w+)\s+in\s+(\w+)", code)
+
+ if has_json_loads and has_for_loop:
+ loop_var = has_for_loop.group(1)
+ iterable_var = has_for_loop.group(2)
+ key_access = re.search(rf"{re.escape(loop_var)}\[(['\"])\w+\1\]", code)
+ if key_access:
+ hint_parts.append(
+ f"You parsed the JSON correctly, but the result is a DICT (not a list of objects).\n"
+ f"When you write `for {loop_var} in {iterable_var}:`, Python iterates over the dict KEYS (strings).\n"
+ f"Then `{loop_var}['...']` fails because {loop_var} is a string like 'T-Shirt', not a dict.\n\n"
+ f"FIX: Use `.items()` to get key-value pairs:\n"
+ f" for key, value in {iterable_var}.items():\n"
+ f" print(key, value)\n\n"
+ f"Check the API REFERENCE for the exact return shape of each action."
+ )
+
+ if hint_parts:
+ return "\n".join(hint_parts)
+
+ return (
+ "You are trying to use string indexing (e.g., x['key']) on a STRING value. Common causes:\n"
+ "1) An actions.* method returned a JSON STRING instead of a dict/list. "
+ "Parse it first: `result = json.loads(actions.some_method(arg))`\n"
+ "2) The data is a DICT, not a list of objects. "
+ "Iterating over a dict yields keys (strings), not objects. "
+ "Use `.items()` to get (key, value) pairs, or `.values()` for values only.\n"
+ "3) A field contains ID strings, not objects. "
+ "Fetch the full object using the appropriate actions.* method.\n"
+ "Check the API REFERENCE Usage examples for the correct iteration pattern."
+ )
+
+ if "'dict' object has no attribute 'value'" in error_msg:
+ return "Dictionaries have a `.values()` method (plural), not `.value`. Did you mean `.values()`?"
+
+ if domain == "airline" and "name 'db' is not defined" in error_msg:
+ return "The `db` variable is available in the global scope. You do not need to import it."
+
+ if "missing 1 required positional argument: 'code'" in error_msg:
+ return 'The tool call is missing the \'code\' argument. Ensure your JSON tool call has {"code": "..."}.'
+
+ if "'builtin_function_or_method' object is not iterable" in error_msg:
+ return (
+ "You are trying to iterate over a method instead of calling it. "
+ "Check if you forgot parentheses: `.items()` not `.items`, "
+ "`.values()` not `.values`."
+ )
+
+ if "input() is not available" in error_msg:
+ return (
+ "You cannot use input() in the sandbox. "
+ "Extract the information from the conversation history instead. "
+ "If you don't have the information yet, send a message asking the user for it."
+ )
+
+ if "Import of" in error_msg and "not allowed" in error_msg:
+ return "External imports are not allowed in the sandbox. Use the provided 'actions' object and built-in modules (json, math, re)."
+
+ if "__name__" in error_msg or 'is an invalid attribute name because it starts with "_"' in error_msg:
+ return "Dunder attributes (like __name__) are blocked by sandbox security."
+
+ if "not found" in error_msg.lower() and "Error calling tool" in error_msg:
+ if domain == "airline":
+ if "User" in error_msg and "not found" in error_msg:
+ return "The user was not found. Make sure you are using the ACTUAL ID provided by the user."
+ if "Reservation" in error_msg and "not found" in error_msg:
+ return "The reservation was not found. Make sure you are using the ACTUAL reservation ID provided by the user."
+ if "Flight" in error_msg and "not found" in error_msg:
+ return "The flight was not found. Check the flight number and date."
+ return (
+ "A value was not found. Make sure you are using ACTUAL values retrieved from "
+ "previous tool calls or provided by the user, NOT placeholder or example values. "
+ "If you need an ID, look it up first using the appropriate query/lookup action."
+ )
+
+ if domain == "airline" and "'FlightDB' object has no attribute" in error_msg:
+ return (
+ "The `db` object is a container. It has NO search methods. "
+ "You must iterate over `db.users.values()`, `db.flights.values()`, "
+ "`db.reservations.values()` to find items."
+ )
+
+ if domain == "airline" and "'str' object has no attribute" in error_msg and "db." in code:
+ return (
+ "You might be iterating over a dictionary (for example, `for u in db.users:`). "
+ "This yields keys (strings). Use `.values()` to iterate over objects "
+ "(for example, `for u in db.users.values():`)."
+ )
+
+ if domain == "airline" and "'AirlineTools' object has no attribute" in error_msg:
+ return (
+ "You are trying to call a method that does not exist on the `actions` object. "
+ "Please check the list of AVAILABLE ACTIONS in the system prompt."
+ )
+
+ if "object has no attribute" in error_msg and "actions" in code.lower():
+ return (
+ "You called a method that does not exist. "
+ "Check the API REFERENCE in the system prompt for available actions."
+ )
+
+ if domain == "retail" and "#" in code and "Order not found" in error_msg:
+ return "Use the exact order ID returned by tools (including '#'). If user omitted '#', normalize before lookup."
+
+ return ""
\ No newline at end of file
diff --git a/sample_solutions/AgenticCodeExecution/examples/requirements.txt b/sample_solutions/AgenticCodeExecution/examples/requirements.txt
new file mode 100644
index 00000000..1fe7e50a
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/requirements.txt
@@ -0,0 +1,4 @@
+fastmcp>=2.0.0
+pydantic>=2.0.0
+uvicorn
+starlette
diff --git a/sample_solutions/AgenticCodeExecution/examples/retail/data/.gitkeep b/sample_solutions/AgenticCodeExecution/examples/retail/data/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/sample_solutions/AgenticCodeExecution/examples/retail/mcp_retail_server.py b/sample_solutions/AgenticCodeExecution/examples/retail/mcp_retail_server.py
new file mode 100644
index 00000000..68137b5d
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/retail/mcp_retail_server.py
@@ -0,0 +1,922 @@
+#!/usr/bin/env python3
+"""
+MCP Server for Retail Tools - Fully Standalone
+
+All business logic is directly in the MCP tools - no intermediate wrapper classes.
+"""
+
+import argparse
+import json
+import os
+import sys
+import urllib.request
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from fastmcp import FastMCP
+
+# Add parent directory to sys.path for shared modules (error_hints)
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from retail_data_model import (
+ RetailDB,
+ Order,
+ OrderItem,
+ UserName,
+ User,
+ Product,
+ Variant,
+ CreditCard,
+ PaypalAccount,
+ GiftCard,
+ OrderPayment,
+ OrderFullfilment,
+ UserAddress,
+)
+from error_hints import analyze_execution_error
+
+
+# Default DB path (sibling data/ directory)
+DEFAULT_DB_PATH = str(Path(__file__).resolve().parent / "data" / "db.json")
+
+TAU2_BENCH_URL = (
+ "https://raw.githubusercontent.com/sierra-research/tau2-bench/"
+ "main/data/tau2/domains/retail/db.json"
+)
+
+
+def ensure_db(db_path: str) -> None:
+ """Check that the retail database exists; auto-download from tau2-bench if missing."""
+ p = Path(db_path)
+ if p.exists():
+ return
+ print(f"⚠️ Database not found: {db_path}")
+ print(f" Downloading from tau2-bench …")
+ p.parent.mkdir(parents=True, exist_ok=True)
+ try:
+ urllib.request.urlretrieve(TAU2_BENCH_URL, str(p))
+ print(f" ✅ Downloaded ({p.stat().st_size / 1_048_576:.1f} MB)")
+ except Exception as exc:
+ print(f" ❌ Download failed: {exc}")
+ print(f" Please download manually:")
+ print(f" curl -L -o {db_path} {TAU2_BENCH_URL}")
+ sys.exit(1)
+
+
+# Create the MCP server
+mcp = FastMCP(
+ "Retail Tools Server",
+ instructions="""You are a retail customer service agent. Use these tools to help customers with:
+- Finding their user ID by email or name+zip
+- Looking up order details and status
+- Cancelling pending orders
+- Modifying pending orders (items, address, payment)
+- Processing returns for delivered orders
+- Processing exchanges for delivered orders
+- Looking up product information
+
+Always verify the user's identity before making changes. Ask for confirmation before
+modifying orders or processing returns/exchanges."""
+)
+
+# Global database state
+_db: Optional[RetailDB] = None # Read-only template DB
+_original_db_path: str = "" # Path to the original pristine DB file
+_session_dbs: Dict[str, RetailDB] = {} # Per-session DB copies
+SESSION_DB_DIR = Path(__file__).resolve().parent.parent / "session_dbs"
+SESSION_DB_DIR.mkdir(exist_ok=True)
+
+
+def _normalize_order_id(order_id: str) -> str:
+ """Ensure order_id starts with '#'. Agents frequently omit it."""
+ order_id = order_id.strip()
+ if order_id and not order_id.startswith("#"):
+ order_id = "#" + order_id
+ return order_id
+
+
+def _session_db_file(session_id: str) -> Path:
+ safe = "".join(ch for ch in session_id if ch.isalnum() or ch in ("-", "_"))
+ if not safe:
+ safe = "session"
+ return SESSION_DB_DIR / f"{safe[:64]}.json"
+
+
+def get_db(session_id: str = "") -> RetailDB:
+ """Get the database for a given session.
+
+ If session_id is empty, returns the read-only template DB.
+ If session_id is provided, returns a per-session pristine copy
+ (created on first access from the original file).
+ This ensures each benchmark task gets its own clean database state.
+ """
+ global _db, _original_db_path
+
+ # Initialize template if needed
+ if _db is None:
+ db_path = os.environ.get("RETAIL_DB_PATH", DEFAULT_DB_PATH)
+ _original_db_path = db_path
+ _db = RetailDB.load(db_path)
+ _db._db_path = "" # Prevent accidental writes to original file
+ print(f"Loaded template database from {db_path}")
+ print(f" - {len(_db.products)} products")
+ print(f" - {len(_db.users)} users")
+ print(f" - {len(_db.orders)} orders")
+
+ if not session_id:
+ return _db
+
+ if session_id not in _session_dbs:
+ # Load fresh pristine copy from the original file
+ db = RetailDB.load(_original_db_path)
+ session_db_file = _session_db_file(session_id)
+ db._db_path = str(session_db_file)
+ _session_dbs[session_id] = db
+ print(f"🆕 Created pristine DB for session {session_id[:8]}... "
+ f"({len(_session_dbs)} active sessions)")
+
+ return _session_dbs[session_id]
+
+
+def _get_data_model_defs() -> Dict[str, dict]:
+ model_classes = [
+ Variant,
+ Product,
+ UserName,
+ UserAddress,
+ CreditCard,
+ PaypalAccount,
+ GiftCard,
+ User,
+ OrderItem,
+ OrderPayment,
+ OrderFullfilment,
+ Order,
+ ]
+ defs: Dict[str, dict] = {}
+ for model_cls in model_classes:
+ schema = model_cls.model_json_schema(ref_template="#/$defs/{model}")
+ defs[model_cls.__name__] = {
+ "description": schema.get("description", ""),
+ "properties": schema.get("properties", {}),
+ }
+ return defs
+
+
+def _get_tool_metadata_payload() -> Dict[str, Any]:
+ ordered_actions = [
+ "calculate",
+ "cancel_pending_order",
+ "exchange_delivered_order_items",
+ "find_user_id_by_email",
+ "find_user_id_by_name_zip",
+ "get_order_details",
+ "get_product_details",
+ "get_user_details",
+ "list_all_product_types",
+ "modify_pending_order_address",
+ "modify_pending_order_items",
+ "modify_pending_order_payment",
+ "modify_user_address",
+ "return_delivered_order_items",
+ "transfer_to_human_agents",
+ ]
+
+ return {
+ "ordered_actions": ordered_actions,
+ "return_types": {
+ "calculate": "str",
+ "cancel_pending_order": "str (JSON)",
+ "exchange_delivered_order_items": "str (JSON)",
+ "find_user_id_by_email": "str",
+ "find_user_id_by_name_zip": "str",
+ "get_order_details": "str (JSON)",
+ "get_product_details": "str (JSON)",
+ "get_user_details": "str (JSON)",
+ "list_all_product_types": "str (JSON)",
+ "modify_pending_order_address": "str (JSON)",
+ "modify_pending_order_items": "str (JSON)",
+ "modify_pending_order_payment": "str (JSON)",
+ "modify_user_address": "str (JSON)",
+ "return_delivered_order_items": "str (JSON)",
+ "transfer_to_human_agents": "str",
+ },
+ "semantic_types": {
+ "cancel_pending_order": "Order",
+ "exchange_delivered_order_items": "Order",
+ "get_order_details": "Order",
+ "get_product_details": "Product",
+ "get_user_details": "User",
+ "list_all_product_types": "dict[name, product_id]",
+ "modify_pending_order_address": "Order",
+ "modify_pending_order_items": "Order",
+ "modify_pending_order_payment": "Order",
+ "modify_user_address": "User",
+ "return_delivered_order_items": "Order",
+ },
+ "data_model_defs": _get_data_model_defs(),
+ }
+
+
+# ==================== READ TOOLS ====================
+
+@mcp.tool()
+def find_user_id_by_email(email: str, session_id: str = "") -> str:
+ """Find user id by email. Use this first to identify a customer.
+
+ Args:
+ email: The email of the user, such as 'something@example.com'.
+
+ Usage example:
+ user_id = actions.find_user_id_by_email("real_email_from_user@example.com")
+ print(user_id)
+
+ Notes:
+ - Use a real value obtained from the user.
+ - Do NOT use placeholders like "email" or "user@example.com" unless user provided it.
+
+ Returns:
+ The user id if found.
+ """
+ db = get_db(session_id)
+ for user_id, user in db.users.items():
+ if user.email.lower() == email.lower():
+ return user_id
+ raise ValueError("User not found")
+
+
+@mcp.tool()
+def find_user_id_by_name_zip(first_name: str, last_name: str, zip_code: str, session_id: str = "") -> str:
+ """Find user id by first name, last name, and zip code.
+ Use this if the customer cannot remember their email.
+
+ Args:
+ first_name: The first name of the customer, such as 'John'.
+ last_name: The last name of the customer, such as 'Doe'.
+ zip_code: The zip code of the customer, such as '12345'.
+
+ Usage example:
+ user_id = actions.find_user_id_by_name_zip("RealFirst", "RealLast", "12345")
+ print(user_id)
+
+ Notes:
+ - All three fields must come from the user.
+ - Do NOT use placeholders like "first_name", "last_name", "zip_code" as values.
+
+ Returns:
+ The user id if found.
+ """
+ db = get_db(session_id)
+ for user_id, user in db.users.items():
+ if (
+ user.name.first_name.lower() == first_name.lower()
+ and user.name.last_name.lower() == last_name.lower()
+ and user.address.zip == zip_code
+ ):
+ return user_id
+ raise ValueError("User not found")
+
+
+@mcp.tool()
+def get_order_details(order_id: str, session_id: str = "") -> str:
+ """Returns JSON with order info where items is a LIST of {item_id, product_id, price, ...} dicts and status is a string
+
+ Args:
+ order_id: The order id, such as '#W0000000'. Include the '#' symbol.
+
+ Example:
+ import json
+ order = json.loads(actions.get_order_details("#W1234567"))
+ print(order['status']) # 'pending', 'delivered', 'cancelled', etc.
+ for item in order['items']: # list of dicts
+ print(f"Item {item['item_id']}: ${item['price']}")
+ # order['payment_history'] = [{"payment_method_id": "credit_card_123", "amount": 29.99}]
+ """
+ order_id = _normalize_order_id(order_id)
+ db = get_db(session_id)
+ if order_id not in db.orders:
+ raise ValueError("Order not found")
+ return db.orders[order_id].model_dump_json(indent=2)
+
+
+@mcp.tool()
+def get_product_details(product_id: str, session_id: str = "") -> str:
+ """Returns JSON with product info where variants is a DICT {item_id: info} — use .items() to iterate, NOT a list
+
+ Args:
+ product_id: The product id (numeric string). Get IDs from list_all_product_types(). Different from item_id.
+
+ Example:
+ import json
+ product = json.loads(actions.get_product_details(product_id))
+ for item_id, variant in product['variants'].items():
+ print(f"{item_id}: ${variant['price']} - {variant['options']} - Available: {variant['available']}")
+ print(f"Total variants: {len(product['variants'])}")
+ """
+ db = get_db(session_id)
+ if product_id not in db.products:
+ raise ValueError("Product not found")
+ return db.products[product_id].model_dump_json(indent=2)
+
+
+@mcp.tool()
+def get_user_details(user_id: str, session_id: str = "") -> str:
+ """Returns JSON with user info where orders is a list of ID strings and payment_methods is a DICT {pm_id: info} — use .items()
+
+ Args:
+ user_id: The user id, such as 'sara_doe_496'.
+
+ Example:
+ import json
+ user = json.loads(actions.get_user_details(user_id))
+ print(user['name']) # {"first_name": "Sara", "last_name": "Doe"}
+ for order_id in user['orders']: # list of strings like "#W0001"
+ print(order_id)
+ for pm_id, pm_info in user['payment_methods'].items():
+ print(f"{pm_id}: {pm_info}")
+ """
+ db = get_db(session_id)
+ if user_id not in db.users:
+ raise ValueError("User not found")
+ return db.users[user_id].model_dump_json(indent=2)
+
+
+@mcp.tool()
+def list_all_product_types(session_id: str = "") -> str:
+ """Returns JSON dict {name: product_id} — use .items() to iterate, NOT a list
+
+ Example:
+ import json
+ types = json.loads(actions.list_all_product_types())
+ # types = {"T-Shirt": "1001", "Jeans": "1002", ...}
+ for name, product_id in types.items():
+ print(f"{name}: {product_id}")
+ # To get full details: actions.get_product_details(product_id)
+ """
+ db = get_db(session_id)
+ product_dict = {
+ product.name: product.product_id for product in db.products.values()
+ }
+ return json.dumps(product_dict, sort_keys=True)
+
+
+# ==================== UTILITY TOOLS ====================
+
+@mcp.tool()
+def calculate(expression: str, session_id: str = "") -> str:
+ """Calculate the result of a mathematical expression.
+
+ Args:
+ expression: The mathematical expression, such as '2 + 2' or '100 * 0.1'.
+
+ Returns:
+ The calculated result as a string.
+ """
+ if not all(char in "0123456789+-*/(). " for char in expression):
+ raise ValueError("Invalid characters in expression")
+ return str(round(float(eval(expression, {"__builtins__": None}, {})), 2))
+
+
+@mcp.tool()
+def transfer_to_human_agents(summary: str, session_id: str = "") -> str:
+ """Transfer the customer to a human agent.
+ Only use this if the customer explicitly asks for a human agent, or
+ if you cannot solve their issue with the available tools.
+
+ Args:
+ summary: A summary of the customer's issue.
+
+ Returns:
+ Confirmation message.
+ """
+ return "Transfer successful"
+
+
+@mcp.tool()
+def get_execution_error_hint(error_msg: str, code: str = "", session_id: str = "") -> str:
+ """Return a recovery hint for sandbox execution/tool errors.
+
+ Args:
+ error_msg: The root error message produced by sandbox/tool execution.
+ code: The executed python code snippet (optional, used for pattern detection).
+
+ Returns:
+ str: A concise hint string. Empty string if no specific hint applies.
+ """
+ return analyze_execution_error(error_msg=error_msg, code=code, domain="retail")
+
+
+@mcp.tool()
+def get_tool_metadata(session_id: str = "") -> str:
+ """Return metadata used to build execute_python action/data-model description.
+
+ Returns:
+ JSON string with keys like return_types and data_model_defs.
+ """
+ return json.dumps(_get_tool_metadata_payload())
+
+
+# ==================== WRITE TOOLS ====================
+
+@mcp.tool()
+def cancel_pending_order(order_id: str, reason: str, session_id: str = "") -> str:
+ """Cancel a pending order. The order must be in 'pending' status.
+ Ask the customer for confirmation before cancelling.
+
+ Args:
+ order_id: The order id, such as '#W0000000'. Include the '#' symbol.
+ reason: Either 'no longer needed' or 'ordered by mistake'.
+
+ Returns:
+ A JSON STRING (not a dict). You MUST parse it: order = json.loads(result)
+ Contains updated order details showing cancelled status.
+ """
+ order_id = _normalize_order_id(order_id)
+ db = get_db(session_id)
+
+ if order_id not in db.orders:
+ raise ValueError("Order not found")
+ order = db.orders[order_id]
+
+ if order.status != "pending":
+ raise ValueError("Non-pending order cannot be cancelled")
+
+ if reason not in {"no longer needed", "ordered by mistake"}:
+ raise ValueError("Invalid reason")
+
+ # Handle refunds
+ refunds = []
+ for payment in order.payment_history:
+ payment_id = payment.payment_method_id
+ refund = OrderPayment(
+ transaction_type="refund",
+ amount=payment.amount,
+ payment_method_id=payment_id,
+ )
+ refunds.append(refund)
+
+ # Refund to gift card immediately
+ user = db.users[order.user_id]
+ if payment_id in user.payment_methods:
+ pm = user.payment_methods[payment_id]
+ if isinstance(pm, GiftCard):
+ pm.balance += payment.amount
+ pm.balance = round(pm.balance, 2)
+
+ order.status = "cancelled"
+ order.cancel_reason = reason
+ order.payment_history.extend(refunds)
+
+ db.save() # Persist changes to disk
+ return order.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def exchange_delivered_order_items(
+ order_id: str,
+ item_ids: List[str],
+ new_item_ids: List[str],
+ payment_method_id: str,
+ session_id: str = "",
+) -> str:
+ """Exchange items in a delivered order for new items of the same product type.
+ Ask the customer for confirmation before processing.
+
+ Args:
+ order_id: The order id, such as '#W0000000'. Include the '#' symbol.
+ item_ids: List of item IDs to exchange, such as ['1008292230'].
+ new_item_ids: List of new item IDs to exchange for. Must match positions.
+ payment_method_id: Payment method ID for any price difference. MUST be a real ID from get_user_details() → user['payment_methods'] (e.g., 'credit_card_9513926', 'gift_card_1234567'). NEVER guess or use placeholders.
+
+ Returns:
+ A JSON STRING (not a dict). You MUST parse it: order = json.loads(result)
+ Contains updated order details showing exchange requested status.
+ """
+ order_id = _normalize_order_id(order_id)
+ db = get_db(session_id)
+
+ if order_id not in db.orders:
+ raise ValueError("Order not found")
+ order = db.orders[order_id]
+
+ if order.status != "delivered":
+ raise ValueError("Non-delivered order cannot be exchanged")
+
+ # Check items exist
+ all_item_ids = [item.item_id for item in order.items]
+ for item_id in item_ids:
+ if item_ids.count(item_id) > all_item_ids.count(item_id):
+ raise ValueError(f"Number of {item_id} not found.")
+
+ if len(item_ids) != len(new_item_ids):
+ raise ValueError("The number of items to be exchanged should match.")
+
+ # Calculate price difference
+ diff_price = 0
+ for item_id, new_item_id in zip(item_ids, new_item_ids):
+ item = next((i for i in order.items if i.item_id == item_id), None)
+ if item is None:
+ raise ValueError(f"Item {item_id} not found")
+
+ product = db.products.get(item.product_id)
+ if not product or new_item_id not in product.variants:
+ raise ValueError(f"New item {new_item_id} not found")
+
+ variant = product.variants[new_item_id]
+ if not variant.available:
+ raise ValueError(f"New item {new_item_id} not available")
+
+ diff_price += variant.price - item.price
+
+ diff_price = round(diff_price, 2)
+
+ # Check payment method
+ user = db.users[order.user_id]
+ if payment_method_id not in user.payment_methods:
+ raise ValueError("Payment method not found")
+
+ pm = user.payment_methods[payment_method_id]
+ if isinstance(pm, GiftCard) and pm.balance < diff_price:
+ raise ValueError("Insufficient gift card balance for price difference")
+
+ order.status = "exchange requested"
+ order.exchange_items = sorted(item_ids)
+ order.exchange_new_items = sorted(new_item_ids)
+ order.exchange_payment_method_id = payment_method_id
+ order.exchange_price_difference = diff_price
+
+ db.save() # Persist changes to disk
+ return order.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def return_delivered_order_items(
+ order_id: str,
+ item_ids: List[str],
+ payment_method_id: str,
+ session_id: str = "",
+) -> str:
+ """Return items from a delivered order.
+ Ask the customer for confirmation before processing.
+
+ Args:
+ order_id: The order id, such as '#W0000000'. Include the '#' symbol.
+ item_ids: List of item IDs to return, such as ['1008292230'].
+ payment_method_id: Payment method ID for refund. Must be original payment or a gift card. MUST be a real ID from get_user_details() → user['payment_methods'] (e.g., 'credit_card_9513926', 'gift_card_1234567'). NEVER guess or use placeholders.
+
+ Returns:
+ A JSON STRING (not a dict). You MUST parse it: order = json.loads(result)
+ Contains updated order details showing return requested status.
+ """
+ order_id = _normalize_order_id(order_id)
+ db = get_db(session_id)
+
+ if order_id not in db.orders:
+ raise ValueError("Order not found")
+ order = db.orders[order_id]
+
+ if order.status != "delivered":
+ raise ValueError("Non-delivered order cannot be returned")
+
+ # Check payment method
+ user = db.users[order.user_id]
+ if payment_method_id not in user.payment_methods:
+ raise ValueError("Payment method not found")
+
+ pm = user.payment_methods[payment_method_id]
+ if (
+ not isinstance(pm, GiftCard)
+ and payment_method_id != order.payment_history[0].payment_method_id
+ ):
+ raise ValueError("Payment method should be the original payment method")
+
+ # Check items exist
+ all_item_ids = [item.item_id for item in order.items]
+ for item_id in item_ids:
+ if item_ids.count(item_id) > all_item_ids.count(item_id):
+ raise ValueError("Some item not found")
+
+ order.status = "return requested"
+ order.return_items = sorted(item_ids)
+ order.return_payment_method_id = payment_method_id
+
+ db.save() # Persist changes to disk
+ return order.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def modify_pending_order_items(
+ order_id: str,
+ item_ids: List[str],
+ new_item_ids: List[str],
+ payment_method_id: str,
+ session_id: str = "",
+) -> str:
+ """Modify items in a pending order to new items of the same product type.
+ Can only be done once per order. Ask for confirmation before modifying.
+
+ Args:
+ order_id: The order id, such as '#W0000000'. Include the '#' symbol.
+ item_ids: List of item IDs to modify, such as ['1008292230'].
+ new_item_ids: List of new item IDs. Must match positions and be different items.
+ payment_method_id: Payment method ID for any price difference. MUST be a real ID from get_user_details() → user['payment_methods'] (e.g., 'credit_card_9513926', 'gift_card_1234567'). NEVER guess or use placeholders.
+
+ Returns:
+ A JSON STRING (not a dict). You MUST parse it: order = json.loads(result)
+ Contains updated order details.
+ """
+ order_id = _normalize_order_id(order_id)
+ db = get_db(session_id)
+
+ if order_id not in db.orders:
+ raise ValueError("Order not found")
+ order = db.orders[order_id]
+
+ if order.status != "pending":
+ raise ValueError("Non-pending order cannot be modified")
+
+ # Check items exist
+ all_item_ids = [item.item_id for item in order.items]
+ for item_id in item_ids:
+ if item_ids.count(item_id) > all_item_ids.count(item_id):
+ raise ValueError(f"{item_id} not found")
+
+ if len(item_ids) != len(new_item_ids):
+ raise ValueError("The number of items to be exchanged should match")
+
+ # Calculate price difference and validate
+ diff_price = 0
+ for item_id, new_item_id in zip(item_ids, new_item_ids):
+ if item_id == new_item_id:
+ raise ValueError("The new item id should be different from the old item id")
+
+ item = next((i for i in order.items if i.item_id == item_id), None)
+ if item is None:
+ raise ValueError(f"Item {item_id} not found")
+
+ product = db.products.get(item.product_id)
+ if not product or new_item_id not in product.variants:
+ raise ValueError(f"New item {new_item_id} not found")
+
+ variant = product.variants[new_item_id]
+ if not variant.available:
+ raise ValueError(f"New item {new_item_id} not available")
+
+ diff_price += variant.price - item.price
+
+ # Check payment method
+ user = db.users[order.user_id]
+ if payment_method_id not in user.payment_methods:
+ raise ValueError("Payment method not found")
+
+ pm = user.payment_methods[payment_method_id]
+ if isinstance(pm, GiftCard) and pm.balance < diff_price:
+ raise ValueError("Insufficient gift card balance to pay for the new item")
+
+ # Add payment record
+ order.payment_history.append(
+ OrderPayment(
+ transaction_type="payment" if diff_price > 0 else "refund",
+ amount=abs(diff_price),
+ payment_method_id=payment_method_id,
+ )
+ )
+
+ if isinstance(pm, GiftCard):
+ pm.balance -= diff_price
+ pm.balance = round(pm.balance, 2)
+
+ # Update items
+ for item_id, new_item_id in zip(item_ids, new_item_ids):
+ item = next((i for i in order.items if i.item_id == item_id), None)
+ product = db.products[item.product_id]
+ variant = product.variants[new_item_id]
+
+ item.item_id = new_item_id
+ item.price = variant.price
+ item.options = variant.options
+
+ order.status = "pending (item modified)"
+
+ db.save() # Persist changes to disk
+ return order.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def modify_pending_order_address(
+ order_id: str,
+ address1: str,
+ address2: str,
+ city: str,
+ state: str,
+ country: str,
+ zip_code: str,
+ session_id: str = "",
+) -> str:
+ """Modify the shipping address of a pending order.
+ Ask for confirmation before modifying.
+
+ Args:
+ order_id: The order id, such as '#W0000000'. Include the '#' symbol.
+ address1: First line of address, such as '123 Main St'.
+ address2: Second line of address, such as 'Apt 1' or empty string.
+ city: City name.
+ state: State abbreviation, such as 'CA'.
+ country: Country, such as 'USA'.
+ zip_code: ZIP code, such as '12345'.
+
+ Returns:
+ A JSON STRING (not a dict). You MUST parse it: order = json.loads(result)
+ Contains updated order details.
+ """
+ order_id = _normalize_order_id(order_id)
+ db = get_db(session_id)
+
+ if order_id not in db.orders:
+ raise ValueError("Order not found")
+ order = db.orders[order_id]
+
+ if "pending" not in order.status:
+ raise ValueError("Non-pending order cannot be modified")
+
+ order.address = UserAddress(
+ address1=address1,
+ address2=address2,
+ city=city,
+ state=state,
+ country=country,
+ zip=zip_code,
+ )
+
+ db.save() # Persist changes to disk
+ return order.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def modify_pending_order_payment(order_id: str, payment_method_id: str, session_id: str = "") -> str:
+ """Modify the payment method of a pending order.
+ Ask for confirmation before modifying.
+
+ Args:
+ order_id: The order id, such as '#W0000000'. Include the '#' symbol.
+ payment_method_id: New payment method ID. MUST be a real ID from get_user_details() → user['payment_methods'] (e.g., 'credit_card_9513926', 'gift_card_1234567'). NEVER guess or use placeholders.
+
+ Returns:
+ A JSON STRING (not a dict). You MUST parse it: order = json.loads(result)
+ Contains updated order details.
+ """
+ order_id = _normalize_order_id(order_id)
+ db = get_db(session_id)
+
+ if order_id not in db.orders:
+ raise ValueError("Order not found")
+ order = db.orders[order_id]
+
+ if "pending" not in order.status:
+ raise ValueError("Non-pending order cannot be modified")
+
+ # Check payment method exists
+ user = db.users[order.user_id]
+ if payment_method_id not in user.payment_methods:
+ raise ValueError("Payment method not found")
+
+ pm = user.payment_methods[payment_method_id]
+
+ # Validate payment history
+ if (
+ len(order.payment_history) != 1
+ or order.payment_history[0].transaction_type != "payment"
+ ):
+ raise ValueError("There should be exactly one payment for a pending order")
+
+ if order.payment_history[0].payment_method_id == payment_method_id:
+ raise ValueError("The new payment method should be different from the current one")
+
+ amount = order.payment_history[0].amount
+
+ if isinstance(pm, GiftCard) and pm.balance < amount:
+ raise ValueError("Insufficient gift card balance to pay for the order")
+
+ # Add new payment and refund records
+ order.payment_history.extend([
+ OrderPayment(
+ transaction_type="payment",
+ amount=amount,
+ payment_method_id=payment_method_id,
+ ),
+ OrderPayment(
+ transaction_type="refund",
+ amount=amount,
+ payment_method_id=order.payment_history[0].payment_method_id,
+ ),
+ ])
+
+ # Update gift card balances
+ if isinstance(pm, GiftCard):
+ pm.balance -= amount
+ pm.balance = round(pm.balance, 2)
+
+ old_pm_id = order.payment_history[0].payment_method_id
+ if old_pm_id in user.payment_methods:
+ old_pm = user.payment_methods[old_pm_id]
+ if isinstance(old_pm, GiftCard):
+ old_pm.balance += amount
+ old_pm.balance = round(old_pm.balance, 2)
+
+ db.save() # Persist changes to disk
+ return order.model_dump_json(indent=2)
+
+
+@mcp.tool()
+def modify_user_address(
+ user_id: str,
+ address1: str,
+ address2: str,
+ city: str,
+ state: str,
+ country: str,
+ zip_code: str,
+ session_id: str = "",
+) -> str:
+ """Modify the default address of a user.
+ Ask for confirmation before modifying.
+
+ Args:
+ user_id: The user id, such as 'sara_doe_496'.
+ address1: First line of address.
+ address2: Second line of address.
+ city: City name.
+ state: State abbreviation.
+ country: Country.
+ zip_code: ZIP code.
+
+ Returns:
+ A JSON STRING (not a dict). You MUST parse it: user = json.loads(result)
+ Contains updated user details.
+ """
+ db = get_db(session_id)
+
+ if user_id not in db.users:
+ raise ValueError("User not found")
+ user = db.users[user_id]
+
+ user.address = UserAddress(
+ address1=address1,
+ address2=address2,
+ city=city,
+ state=state,
+ country=country,
+ zip=zip_code,
+ )
+
+ db.save() # Persist changes to disk
+ return user.model_dump_json(indent=2)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Retail MCP Server")
+ parser.add_argument(
+ "--db-path",
+ default=DEFAULT_DB_PATH,
+ help="Path to the retail database JSON file"
+ )
+ parser.add_argument(
+ "--port",
+ type=int,
+ default=5050,
+ help="Port to run the SSE server on"
+ )
+ parser.add_argument(
+ "--host",
+ default="0.0.0.0",
+ help="Host to bind to"
+ )
+ parser.add_argument(
+ "--transport",
+ choices=["sse", "stdio"],
+ default="sse",
+ help="Transport protocol to use"
+ )
+
+ args = parser.parse_args()
+
+ # Set DB path via environment variable
+ os.environ["RETAIL_DB_PATH"] = args.db_path
+
+ # Ensure DB exists (auto-download from tau2-bench if missing)
+ ensure_db(args.db_path)
+
+ # Initialize template DB (read-only, never written to)
+ get_db()
+ print(f" Original DB file is READ-ONLY (per-session copies used for mutations)")
+ print(f" Session DB dir: {SESSION_DB_DIR}")
+
+ print(f"\n🚀 Starting Retail MCP Server...")
+ print(f" Transport: {args.transport}")
+ if args.transport == "sse":
+ print(f" Host: {args.host}")
+ print(f" Port: {args.port}")
+ print(f" SSE endpoint: http://{args.host}:{args.port}/sse")
+
+ # Run the server
+ mcp.run(transport=args.transport, host=args.host, port=args.port)
diff --git a/sample_solutions/AgenticCodeExecution/examples/retail/retail-system-prompt.txt b/sample_solutions/AgenticCodeExecution/examples/retail/retail-system-prompt.txt
new file mode 100644
index 00000000..6fff23b3
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/retail/retail-system-prompt.txt
@@ -0,0 +1,346 @@
+
+You are a customer service agent that helps the user according to the provided below.
+
+============================================================
+SECTION 1 — GENERIC EXECUTE_PYTHON INSTRUCTIONS
+============================================================
+
+HOW YOU WORK:
+You have ONE tool: execute_python. It runs Python code in a sandbox.
+Inside that sandbox, an `actions` object is pre-loaded with methods you call to perform operations (look up records, search options, modify state, etc.).
+The full list of available actions and their parameters is documented in the execute_python tool description — READ IT before writing code.
+
+TURN STRUCTURE (STRICT):
+In each turn you must do EXACTLY ONE of the following — never both:
+ A) Send a text message to the user, OR
+ B) Make an execute_python call.
+You MUST NOT combine a message and a tool call in the same turn.
+In particular, if execute_python returned an error, your next turn must be ONLY an execute_python call with fixed code — no text, no apology, no explanation.
+
+CRITICAL — WRITE COMPLETE SCRIPTS:
+A single execute_python call can contain MANY actions.* calls chained together in one script.
+You MUST combine all related steps into ONE execute_python call.
+Do NOT make separate execute_python calls for each individual action.
+
+Think about what information you need, then write ONE script that gathers and processes ALL of it.
+Only make a second execute_python call if the first one fails or if you need user input before continuing.
+
+SANDBOX ENVIRONMENT:
+Your code runs in a restricted Python sandbox. These constraints apply:
+- ALLOWED IMPORTS: json, math, datetime, time, re, collections, itertools — nothing else
+- NO file I/O: open(), os, sys, subprocess, pathlib are all blocked
+- NO exec(), eval(), compile()
+- NO dunder attributes: __name__, __class__, __dict__ etc. are blocked
+- NO input(): extract information from the conversation instead
+- The `actions` object is ALREADY AVAILABLE — do not import it
+
+CRITICAL INPUT BAN (HIGHEST PRIORITY):
+- NEVER use input() in execute_python code.
+- NEVER use placeholder variables such as `input`, `Input`, or `user_input`.
+- NEVER write code like `email = input(...)` or `value = Input`.
+- If any value is missing, ask the user in a normal assistant message (not in Python code).
+- In execute_python code, only use values already present in conversation/tool outputs.
+- Any script using input() is invalid and must be rewritten before execution.
+
+CODE GENERATION RULES:
+
+1. Treat actions.* outputs by TYPE:
+ - If output is structured data (object/list), parse with json.loads() before field access.
+ - If output is a scalar (e.g., identifier/status string), use it directly.
+
+ Safe pattern:
+ import json
+ raw = actions.ANY_METHOD(args)
+ # parse when raw looks like structured JSON; otherwise use raw directly
+ data = json.loads(raw) if isinstance(raw, str) and raw.lstrip().startswith('[') else raw
+ # for JSON objects, use the same idea: parse when output is structured JSON text
+
+ ❌ WRONG — accessing fields on the raw JSON string:
+ result = actions.some_write_method(...)
+ print(result['status']) # CRASH: result is a STRING, not a dict
+
+ ✅ CORRECT — parse first, then access:
+ result = json.loads(actions.some_write_method(...))
+ print(result['status'])
+
+2. ALWAYS print results — print() is the ONLY way to see output:
+ print(data)
+
+3. DICT vs LIST — read the signature line for each action:
+ Many actions return DICTS (not lists). Iterating a dict gives you KEYS (strings), not objects.
+
+ ❌ WRONG — treats dict as list of objects:
+ for item in data:
+ print(item['name']) # CRASH: item is a string key, not a dict
+
+ ✅ CORRECT — use .items() for dicts:
+ for key, value in data.items():
+ print(key, value)
+
+ When unsure, print the data first: print(type(data), data)
+
+4. STATELESS: Variables do NOT persist between execute_python calls.
+ Put ALL steps in ONE script.
+
+5. NEVER fabricate identifiers or option values.
+ Extract concrete values from tool outputs and reuse them exactly.
+ Never pass placeholder tokens like "user_id", "order_id", "item_id", "payment_method_id"
+ as actual values. Those are parameter NAMES, not real values.
+
+6. PRE-FLIGHT CHECKLIST before any state-changing action:
+ Before calling any write action, verify all required arguments come from current
+ data in THIS script and satisfy preconditions.
+
+ Generic checks:
+ - Every argument variable is defined before use (no undefined names).
+ - No input()/Input/user_input usage anywhere in the script.
+ - Entities referenced by the action are confirmed to exist in retrieved data.
+ - Current state allows the action (e.g., status/eligibility/business constraints).
+ - Argument values are concrete runtime values (not placeholder strings).
+
+ Pattern:
+ # 1) Read current state
+ state = json.loads(actions.some_read_method(...))
+ print(state)
+
+ # 2) Validate preconditions and required values
+ # (extract concrete values from state; do not invent placeholders)
+
+ # 3) Execute write action only after checks
+ result = json.loads(actions.some_write_method(...))
+ print(result)
+
+7. LOOKUP + EXECUTION GUARDRAILS (GENERIC):
+ A) LOOKUP FALLBACK (max 2 tries):
+ - Try one primary lookup strategy.
+ - If it fails, try one alternative strategy.
+ - If both fail, DO NOT guess values and DO NOT run write actions.
+ Ask the user for the missing value(s) in a normal assistant message.
+
+ B) NEVER USE PLACEHOLDER LITERALS AS REAL VALUES:
+ Parameter names are not runtime values.
+ Forbidden as actual values: "user_id", "order_id", "item_id", "product_id",
+ "payment_method_id", "email", "first_name", "last_name", "zip", etc.
+
+ C) READ-THEN-WRITE DISCIPLINE:
+ Before any write action, explicitly read current state and confirm:
+ - target entity exists
+ - operation is allowed in current state
+ - selected replacement/option/value is valid in current data
+
+ D) ARGUMENT SANITY CHECKS BEFORE WRITE:
+ For each required argument:
+ - it is defined in this script
+ - it is non-empty and concrete (not placeholder text)
+ - it was derived from conversation/tool output
+ Print chosen argument values before the write call.
+
+ E) TWO-PHASE SCRIPT TEMPLATE:
+ - Phase 1: gather and print all required facts
+ - Phase 2: execute write action only if checks pass
+
+8. MANDATORY PRE-EXECUTION SELF-CHECK (RUN THIS MENTALLY BEFORE EVERY execute_python CALL):
+ - No `input()`, `input`, `Input`, or `user_input` anywhere in code.
+ - No placeholder literals used as real values (`"user_id"`, `"order_id"`, etc.).
+ - Every variable used in arguments is defined in THIS script.
+ - For every critical value, print it before write actions.
+ - If a lookup failed twice, DO NOT continue with write actions; ask user for corrected value.
+
+9. STRICT LOOKUP FAILURE RULE:
+ If authentication/lookup fails (target entity not found), you MUST stop mutation flow.
+ Do not call write actions after unresolved lookup failure.
+ Your next action is either:
+ - one alternative lookup attempt, OR
+ - ask the user for corrected identifying information.
+
+SILENT ERROR RECOVERY (MANDATORY):
+When execute_python returns an error, you MUST:
+1. NOT produce any text message — no apology, no explanation, no "let me try again"
+2. Immediately make another execute_python call with corrected code
+3. The user must NEVER know an error occurred
+
+This means your very next turn after an error is ONLY a tool call — zero words to the user.
+
+❌ WRONG: "I apologize for the error. Let me try a different approach." + execute_python
+❌ WRONG: "Let me fix that." + execute_python
+❌ WRONG: Any text at all before or alongside the retry
+✅ CORRECT: execute_python (with fixed code, no accompanying text)
+
+Only speak to the user when you have a successful result or need information from them.
+If after 3 retries you still cannot complete the task, you may say there is a temporary technical issue — but never mention implementation details.
+
+ANTI-LOOP GUARD (GENERIC):
+- If you hit the same error class twice in a row (e.g., repeated "not found" or repeated type/index error),
+ stop retrying variations and switch strategy:
+ 1) one alternative lookup/validation path, then
+ 2) ask user for the missing/corrected value if still unresolved.
+- Do not burn steps by repeating near-identical failing scripts.
+
+============================================================
+SECTION 2 — USE-CASE SPECIFIC EXAMPLES (RETAIL)
+============================================================
+
+CRITICAL — WRITE COMPLETE SCRIPTS EXAMPLE:
+
+❌ WRONG (multiple execute_python calls, one action each):
+ Turn 1: execute_python → actions.find_user_id_by_email("user@example.com")
+ Turn 2: execute_python → actions.get_user_details("user_123")
+ Turn 3: execute_python → actions.get_order_details("#W456")
+
+✅ CORRECT (one execute_python call with a complete script):
+ execute_python →
+ import json
+ user_id = actions.find_user_id_by_email("user@example.com")
+ print(f"User ID: {{user_id}}")
+ user = json.loads(actions.get_user_details(user_id))
+ print(f"Name: {{user['name']}}")
+ order = json.loads(actions.get_order_details("#W456"))
+ print(f"Order status: {{order['status']}}")
+
+IDENTIFIER REUSE EXAMPLE:
+ user = json.loads(actions.get_user_details(user_id))
+ for pm_id, pm_info in user['payment_methods'].items():
+ print(f"{{pm_id}}: {{pm_info}}") # pm_id IS the payment method ID — use it exactly
+
+TRANSFER TO HUMAN AGENT:
+To transfer, make an execute_python call with code: actions.transfer_to_human_agents("summary of the issue"), then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
+
+
+# Retail agent policy
+
+As a retail agent, you can help users:
+
+- **cancel or modify pending orders**
+- **return or exchange delivered orders**
+- **modify their default user address**
+- **provide information about their own profile, orders, and related products**
+
+At the beginning of the conversation, you have to authenticate the user identity by locating their user id via email, or via name + zip code. This has to be done even when the user already provides the user id.
+
+Once the user has been authenticated, you can provide the user with information about order, product, profile information, e.g. help the user look up order id.
+
+You can only help one user per conversation (but you can handle multiple requests from the same user), and must deny any requests for tasks related to any other user.
+
+Before taking any action that updates the database (cancel, modify, return, exchange), you must list the action details and obtain explicit user confirmation (yes) to proceed.
+
+You should not make up any information or knowledge or procedures not provided by the user or the tools, or give subjective recommendations or comments.
+
+You should at most make one tool call at a time, and if you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call at the same time.
+
+You should deny user requests that are against this policy.
+
+You should transfer the user to a human agent if and only if the request cannot be handled within the scope of your actions. To transfer, first make a tool call to transfer_to_human_agents, and then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
+
+## Domain basic
+
+- All times in the database are EST and 24 hour based. For example "02:30:00" means 2:30 AM EST.
+
+### User
+
+Each user has a profile containing:
+
+- unique user id
+- email
+- default address
+- payment methods.
+
+There are three types of payment methods: **gift card**, **paypal account**, **credit card**.
+
+### Product
+
+Our retail store has 50 types of products.
+
+For each **type of product**, there are **variant items** of different **options**.
+
+For example, for a 't-shirt' product, there could be a variant item with option 'color blue size M', and another variant item with option 'color red size L'.
+
+Each product has the following attributes:
+
+- unique product id
+- name
+- list of variants
+
+Each variant item has the following attributes:
+
+- unique item id
+- information about the value of the product options for this item.
+- availability
+- price
+
+Note: Product ID and Item ID have no relations and should not be confused!
+
+### Order
+
+Each order has the following attributes:
+
+- unique order id
+- user id
+- address
+- items ordered
+- status
+- fullfilments info (tracking id and item ids)
+- payment history
+
+The status of an order can be: **pending**, **processed**, **delivered**, or **cancelled**.
+
+Orders can have other optional attributes based on the actions that have been taken (cancellation reason, which items have been exchanged, what was the exchane price difference etc)
+
+## Generic action rules
+
+Generally, you can only take action on pending or delivered orders.
+
+Exchange or modify order tools can only be called once per order. Be sure that all items to be changed are collected into a list before making the tool call!!!
+
+## Cancel pending order
+
+An order can only be cancelled if its status is 'pending', and you should check its status before taking the action.
+
+The user needs to confirm the order id and the reason (either 'no longer needed' or 'ordered by mistake') for cancellation. Other reasons are not acceptable.
+
+After user confirmation, the order status will be changed to 'cancelled', and the total will be refunded via the original payment method immediately if it is gift card, otherwise in 5 to 7 business days.
+
+## Modify pending order
+
+An order can only be modified if its status is 'pending', and you should check its status before taking the action.
+
+For a pending order, you can take actions to modify its shipping address, payment method, or product item options, but nothing else.
+
+### Modify payment
+
+The user can only choose a single payment method different from the original payment method.
+
+If the user wants the modify the payment method to gift card, it must have enough balance to cover the total amount.
+
+After user confirmation, the order status will be kept as 'pending'. The original payment method will be refunded immediately if it is a gift card, otherwise it will be refunded within 5 to 7 business days.
+
+### Modify items
+
+This action can only be called once, and will change the order status to 'pending (items modifed)'. The agent will not be able to modify or cancel the order anymore. So you must confirm all the details are correct and be cautious before taking this action. In particular, remember to remind the customer to confirm they have provided all the items they want to modify.
+
+For a pending order, each item can be modified to an available new item of the same product but of different product option. There cannot be any change of product types, e.g. modify shirt to shoe.
+
+The user must provide a payment method to pay or receive refund of the price difference. If the user provides a gift card, it must have enough balance to cover the price difference.
+
+## Return delivered order
+
+An order can only be returned if its status is 'delivered', and you should check its status before taking the action.
+
+The user needs to confirm the order id and the list of items to be returned.
+
+The user needs to provide a payment method to receive the refund.
+
+The refund must either go to the original payment method, or an existing gift card.
+
+After user confirmation, the order status will be changed to 'return requested', and the user will receive an email regarding how to return items.
+
+## Exchange delivered order
+
+An order can only be exchanged if its status is 'delivered', and you should check its status before taking the action. In particular, remember to remind the customer to confirm they have provided all items to be exchanged.
+
+For a delivered order, each item can be exchanged to an available new item of the same product but of different product option. There cannot be any change of product types, e.g. modify shirt to shoe.
+
+The user must provide a payment method to pay or receive refund of the price difference. If the user provides a gift card, it must have enough balance to cover the price difference.
+
+After user confirmation, the order status will be changed to 'exchange requested', and the user will receive an email regarding how to return items. There is no need to place a new order.
+
+
\ No newline at end of file
diff --git a/sample_solutions/AgenticCodeExecution/examples/retail/retail_data_model.py b/sample_solutions/AgenticCodeExecution/examples/retail/retail_data_model.py
new file mode 100644
index 00000000..9438b4ad
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/retail/retail_data_model.py
@@ -0,0 +1,215 @@
+"""Data models for the retail domain - Standalone version without tau2 dependencies."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional, Union
+
+from pydantic import BaseModel, Field
+
+
+class Variant(BaseModel):
+ """Represents a specific variant of a product with unique options"""
+
+ item_id: str = Field(description="Unique identifier for the item")
+ options: Dict[str, str] = Field(description="Options of the item, e.g. color, size")
+ available: bool = Field(description="Whether the item is available")
+ price: float = Field(description="Price of the item")
+
+
+class Product(BaseModel):
+ """Represents a product type with multiple variants"""
+
+ name: str = Field(description="Name of the product")
+ product_id: str = Field(description="Unique identifier for the product")
+ variants: Dict[str, Variant] = Field(
+ description="Dictionary of variants indexed by item ID"
+ )
+
+
+class UserName(BaseModel):
+ """User's full name"""
+
+ first_name: str = Field(description="First name")
+ last_name: str = Field(description="Last name")
+
+
+class UserAddress(BaseModel):
+ """User's address details"""
+
+ address1: str = Field(description="First line of the address")
+ address2: str = Field(description="Second line of the address")
+ city: str = Field(description="City")
+ state: str = Field(description="State")
+ country: str = Field(description="Country")
+ zip: str = Field(description="ZIP code")
+
+
+class CreditCard(BaseModel):
+ """Credit card payment method"""
+
+ source: Literal["credit_card"] = "credit_card"
+ brand: str = Field(description="Credit card brand")
+ last_four: str = Field(description="Last four digits of the credit card")
+ id: str = Field(description="Unique identifier for the credit card")
+
+
+class PaypalAccount(BaseModel):
+ """PayPal payment method"""
+
+ source: Literal["paypal"] = "paypal"
+ id: str = Field(description="Unique identifier for the PayPal account")
+
+
+class GiftCard(BaseModel):
+ """Gift card payment method"""
+
+ source: Literal["gift_card"] = "gift_card"
+ balance: float = Field(description="Balance of the gift card")
+ id: str = Field(description="Unique identifier for the gift card")
+
+
+PaymentMethod = Union[CreditCard, PaypalAccount, GiftCard]
+
+
+class User(BaseModel):
+ """Represents a customer with their details and orders"""
+
+ user_id: str = Field(description="Unique identifier for the user")
+ name: UserName = Field(description="Name of the user")
+ address: UserAddress = Field(description="Address of the user")
+ email: str = Field(description="Email of the user")
+ payment_methods: Dict[str, PaymentMethod] = Field(
+ description="Payment methods of the user"
+ )
+ orders: List[str] = Field(description="Order IDs of the user's orders")
+
+
+class OrderItem(BaseModel):
+ """An item within an order"""
+
+ name: str = Field(description="Name of the item")
+ product_id: str = Field(description="Product ID of the item")
+ item_id: str = Field(description="Item ID of the item")
+ price: float = Field(description="Price of the item")
+ options: Dict[str, str] = Field(description="Options of the item")
+
+
+class OrderPayment(BaseModel):
+ """A payment transaction for an order"""
+
+ transaction_type: Literal["payment", "refund"] = Field(
+ description="Type of the transaction"
+ )
+ amount: float = Field(description="Amount of the transaction")
+ payment_method_id: str = Field(
+ description="Payment method ID of the transaction"
+ )
+
+
+OrderStatus = Literal[
+ "pending",
+ "pending (item modified)",
+ "processed",
+ "shipped",
+ "delivered",
+ "cancelled",
+ "return requested",
+ "exchange requested",
+]
+
+
+CancelReason = Literal["no longer needed", "ordered by mistake"]
+
+
+class OrderFullfilment(BaseModel):
+ """Fulfillment details for an order"""
+
+ tracking_id: List[str] = Field(description="Tracking IDs of the order")
+ item_ids: List[str] = Field(description="Item IDs of the order")
+
+
+class Order(BaseModel):
+ """Represents an order with its items, status, fulfillment and payment details"""
+
+ order_id: str = Field(description="Unique identifier for the order")
+ user_id: str = Field(description="Unique identifier for the user")
+ address: UserAddress = Field(description="Address of the user")
+ items: List[OrderItem] = Field(description="Items in the order")
+ status: OrderStatus = Field(description="Status of the order")
+ fulfillments: List[OrderFullfilment] = Field(
+ description="Fulfillments of the order"
+ )
+ payment_history: List[OrderPayment] = Field(description="Payments of the order")
+ cancel_reason: Optional[CancelReason] = Field(
+ description="Reason for cancelling the order",
+ default=None,
+ )
+ exchange_items: Optional[List[str]] = Field(
+ description="Items to be exchanged", default=None
+ )
+ exchange_new_items: Optional[List[str]] = Field(
+ description="Items exchanged for", default=None
+ )
+ exchange_payment_method_id: Optional[str] = Field(
+ description="Payment method ID for the exchange", default=None
+ )
+ exchange_price_difference: Optional[float] = Field(
+ description="Price difference for the exchange", default=None
+ )
+ return_items: Optional[List[str]] = Field(
+ description="Items to be returned", default=None
+ )
+ return_payment_method_id: Optional[str] = Field(
+ description="Payment method ID for the return", default=None
+ )
+
+
+class RetailDB(BaseModel):
+ """Database containing all retail-related data including products, users and orders"""
+
+ model_config = {"extra": "allow"}
+
+ products: Dict[str, Product] = Field(
+ description="Dictionary of all products indexed by product ID"
+ )
+ users: Dict[str, User] = Field(
+ description="Dictionary of all users indexed by user ID"
+ )
+ orders: Dict[str, Order] = Field(
+ description="Dictionary of all orders indexed by order ID"
+ )
+
+ _db_path: str = ""
+
+ @classmethod
+ def load(cls, path: str | Path) -> "RetailDB":
+ """Load the database from a JSON file."""
+ with open(path, "r") as f:
+ data = json.load(f)
+ db = cls.model_validate(data)
+ db._db_path = str(path)
+ return db
+
+ def save(self) -> None:
+ """Save the database back to the JSON file."""
+ if self._db_path:
+ with open(self._db_path, "w") as f:
+ json.dump(self.model_dump(exclude={"_db_path"}), f, indent=2)
+ print(f"Database saved to {self._db_path}")
+
+ def get_statistics(self) -> Dict[str, Any]:
+ """Get the statistics of the database."""
+ num_products = len(self.products)
+ num_users = len(self.users)
+ num_orders = len(self.orders)
+ total_num_items = sum(
+ len(product.variants) for product in self.products.values()
+ )
+ return {
+ "num_products": num_products,
+ "num_users": num_users,
+ "num_orders": num_orders,
+ "total_num_items": total_num_items,
+ }
diff --git a/sample_solutions/AgenticCodeExecution/examples/stocks/data/db.json b/sample_solutions/AgenticCodeExecution/examples/stocks/data/db.json
new file mode 100644
index 00000000..ae03980c
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/stocks/data/db.json
@@ -0,0 +1,452 @@
+{
+ "accounts": {
+ "acc_jane_001": {
+ "account_id": "acc_jane_001",
+ "name": "Jane Miller",
+ "email": "jane.miller@example.com",
+ "cash_balance": 25000.0,
+ "positions": {
+ "AAPL": {"quantity": 40, "avg_cost": 176.25},
+ "MSFT": {"quantity": 12, "avg_cost": 401.1},
+ "NVDA": {"quantity": 18, "avg_cost": 109.45},
+ "AMZN": {"quantity": 15, "avg_cost": 171.9}
+ },
+ "watchlist": ["TSLA", "META", "AMD", "NFLX"],
+ "order_ids": ["ORD-000001", "ORD-000002", "ORD-000004", "ORD-000005"]
+ },
+ "acc_marcus_002": {
+ "account_id": "acc_marcus_002",
+ "name": "Marcus Lee",
+ "email": "marcus.lee@example.com",
+ "cash_balance": 8400.5,
+ "positions": {
+ "TSLA": {"quantity": 8, "avg_cost": 218.8},
+ "GOOGL": {"quantity": 9, "avg_cost": 158.3},
+ "XOM": {"quantity": 30, "avg_cost": 104.7}
+ },
+ "watchlist": ["AAPL", "MSFT", "JPM", "PFE"],
+ "order_ids": ["ORD-000003", "ORD-000006", "ORD-000007"]
+ },
+ "acc_sofia_003": {
+ "account_id": "acc_sofia_003",
+ "name": "Sofia Alvarez",
+ "email": "sofia.alvarez@example.com",
+ "cash_balance": 15230.75,
+ "positions": {
+ "META": {"quantity": 11, "avg_cost": 488.2},
+ "NFLX": {"quantity": 7, "avg_cost": 561.35},
+ "AMD": {"quantity": 40, "avg_cost": 151.8},
+ "BAC": {"quantity": 120, "avg_cost": 34.9}
+ },
+ "watchlist": ["NVDA", "AAPL", "UNH", "AMZN"],
+ "order_ids": ["ORD-000008", "ORD-000009", "ORD-000010"]
+ },
+ "acc_ethan_004": {
+ "account_id": "acc_ethan_004",
+ "name": "Ethan Brooks",
+ "email": "ethan.brooks@example.com",
+ "cash_balance": 50210.2,
+ "positions": {
+ "JPM": {"quantity": 55, "avg_cost": 179.4},
+ "XOM": {"quantity": 80, "avg_cost": 102.1},
+ "BRKB": {"quantity": 16, "avg_cost": 420.6},
+ "UNH": {"quantity": 6, "avg_cost": 534.25}
+ },
+ "watchlist": ["MSFT", "GOOGL", "PFE", "TSLA"],
+ "order_ids": ["ORD-000011", "ORD-000012"]
+ },
+ "acc_priya_005": {
+ "account_id": "acc_priya_005",
+ "name": "Priya Nair",
+ "email": "priya.nair@example.com",
+ "cash_balance": 11200.0,
+ "positions": {
+ "AAPL": {"quantity": 14, "avg_cost": 182.0},
+ "PFE": {"quantity": 210, "avg_cost": 30.15},
+ "BAC": {"quantity": 90, "avg_cost": 33.5},
+ "MSFT": {"quantity": 5, "avg_cost": 398.6}
+ },
+ "watchlist": ["JPM", "BRKB", "NVDA", "AMD"],
+ "order_ids": ["ORD-000013", "ORD-000014"]
+ },
+ "acc_noah_006": {
+ "account_id": "acc_noah_006",
+ "name": "Noah Chen",
+ "email": "noah.chen@example.com",
+ "cash_balance": 18675.95,
+ "positions": {
+ "NVDA": {"quantity": 24, "avg_cost": 114.9},
+ "AMD": {"quantity": 65, "avg_cost": 148.1},
+ "AMZN": {"quantity": 10, "avg_cost": 174.25},
+ "GOOGL": {"quantity": 12, "avg_cost": 160.4},
+ "TSLA": {"quantity": 4, "avg_cost": 222.2}
+ },
+ "watchlist": ["META", "NFLX", "AAPL", "XOM"],
+ "order_ids": ["ORD-000015"]
+ }
+ },
+ "market": {
+ "AAPL": {
+ "symbol": "AAPL",
+ "name": "Apple Inc.",
+ "sector": "Technology",
+ "current_price": 189.42,
+ "day_open": 186.95,
+ "day_high": 190.3,
+ "day_low": 186.7,
+ "volume": 58100412
+ },
+ "MSFT": {
+ "symbol": "MSFT",
+ "name": "Microsoft Corp.",
+ "sector": "Technology",
+ "current_price": 412.75,
+ "day_open": 409.55,
+ "day_high": 414.1,
+ "day_low": 408.92,
+ "volume": 21245033
+ },
+ "NVDA": {
+ "symbol": "NVDA",
+ "name": "NVIDIA Corp.",
+ "sector": "Technology",
+ "current_price": 121.38,
+ "day_open": 118.44,
+ "day_high": 122.21,
+ "day_low": 117.98,
+ "volume": 132740120
+ },
+ "TSLA": {
+ "symbol": "TSLA",
+ "name": "Tesla Inc.",
+ "sector": "Automotive",
+ "current_price": 214.86,
+ "day_open": 219.2,
+ "day_high": 220.74,
+ "day_low": 213.41,
+ "volume": 94752113
+ },
+ "AMZN": {
+ "symbol": "AMZN",
+ "name": "Amazon.com Inc.",
+ "sector": "Consumer Discretionary",
+ "current_price": 178.91,
+ "day_open": 177.16,
+ "day_high": 179.33,
+ "day_low": 176.42,
+ "volume": 33659210
+ },
+ "GOOGL": {
+ "symbol": "GOOGL",
+ "name": "Alphabet Inc.",
+ "sector": "Communication Services",
+ "current_price": 163.27,
+ "day_open": 161.4,
+ "day_high": 163.8,
+ "day_low": 160.92,
+ "volume": 28992220
+ },
+ "META": {
+ "symbol": "META",
+ "name": "Meta Platforms Inc.",
+ "sector": "Communication Services",
+ "current_price": 512.34,
+ "day_open": 504.1,
+ "day_high": 513.9,
+ "day_low": 503.7,
+ "volume": 15440210
+ },
+ "AMD": {
+ "symbol": "AMD",
+ "name": "Advanced Micro Devices Inc.",
+ "sector": "Technology",
+ "current_price": 156.82,
+ "day_open": 153.9,
+ "day_high": 157.25,
+ "day_low": 152.8,
+ "volume": 61210340
+ },
+ "NFLX": {
+ "symbol": "NFLX",
+ "name": "Netflix Inc.",
+ "sector": "Communication Services",
+ "current_price": 589.76,
+ "day_open": 582.3,
+ "day_high": 591.2,
+ "day_low": 579.4,
+ "volume": 9812375
+ },
+ "JPM": {
+ "symbol": "JPM",
+ "name": "JPMorgan Chase & Co.",
+ "sector": "Financials",
+ "current_price": 183.64,
+ "day_open": 181.22,
+ "day_high": 184.5,
+ "day_low": 180.91,
+ "volume": 14287123
+ },
+ "XOM": {
+ "symbol": "XOM",
+ "name": "Exxon Mobil Corp.",
+ "sector": "Energy",
+ "current_price": 108.55,
+ "day_open": 107.4,
+ "day_high": 109.02,
+ "day_low": 106.95,
+ "volume": 19833110
+ },
+ "BRKB": {
+ "symbol": "BRKB",
+ "name": "Berkshire Hathaway Inc. Class B",
+ "sector": "Financials",
+ "current_price": 436.18,
+ "day_open": 431.7,
+ "day_high": 437.45,
+ "day_low": 430.9,
+ "volume": 4822010
+ },
+ "UNH": {
+ "symbol": "UNH",
+ "name": "UnitedHealth Group Inc.",
+ "sector": "Healthcare",
+ "current_price": 548.27,
+ "day_open": 541.3,
+ "day_high": 549.1,
+ "day_low": 539.8,
+ "volume": 4075190
+ },
+ "PFE": {
+ "symbol": "PFE",
+ "name": "Pfizer Inc.",
+ "sector": "Healthcare",
+ "current_price": 31.28,
+ "day_open": 30.95,
+ "day_high": 31.44,
+ "day_low": 30.81,
+ "volume": 42310980
+ },
+ "BAC": {
+ "symbol": "BAC",
+ "name": "Bank of America Corp.",
+ "sector": "Financials",
+ "current_price": 35.74,
+ "day_open": 35.29,
+ "day_high": 35.88,
+ "day_low": 35.12,
+ "volume": 52477035
+ },
+ "INTC": {
+ "symbol": "INTC",
+ "name": "Intel Corp.",
+ "sector": "Technology",
+ "current_price": 58.74,
+ "day_open": 57.90,
+ "day_high": 59.12,
+ "day_low": 57.45,
+ "volume": 47823540
+ }
+ },
+ "orders": {
+ "ORD-000001": {
+ "order_id": "ORD-000001",
+ "account_id": "acc_jane_001",
+ "symbol": "AAPL",
+ "side": "buy",
+ "order_type": "market",
+ "quantity": 10,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 182.25,
+ "created_at": "2026-02-28T14:12:03Z",
+ "filled_at": "2026-02-28T14:12:03Z"
+ },
+ "ORD-000002": {
+ "order_id": "ORD-000002",
+ "account_id": "acc_jane_001",
+ "symbol": "NVDA",
+ "side": "buy",
+ "order_type": "limit",
+ "quantity": 5,
+ "limit_price": 115.0,
+ "status": "open",
+ "executed_price": null,
+ "created_at": "2026-03-01T09:45:10Z",
+ "filled_at": null
+ },
+ "ORD-000003": {
+ "order_id": "ORD-000003",
+ "account_id": "acc_marcus_002",
+ "symbol": "TSLA",
+ "side": "buy",
+ "order_type": "market",
+ "quantity": 3,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 210.75,
+ "created_at": "2026-03-01T11:03:55Z",
+ "filled_at": "2026-03-01T11:03:55Z"
+ },
+ "ORD-000004": {
+ "order_id": "ORD-000004",
+ "account_id": "acc_jane_001",
+ "symbol": "AMZN",
+ "side": "buy",
+ "order_type": "limit",
+ "quantity": 6,
+ "limit_price": 176.0,
+ "status": "filled",
+ "executed_price": 175.8,
+ "created_at": "2026-03-02T10:18:12Z",
+ "filled_at": "2026-03-02T10:19:04Z"
+ },
+ "ORD-000005": {
+ "order_id": "ORD-000005",
+ "account_id": "acc_jane_001",
+ "symbol": "MSFT",
+ "side": "sell",
+ "order_type": "market",
+ "quantity": 4,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 410.6,
+ "created_at": "2026-03-02T14:50:00Z",
+ "filled_at": "2026-03-02T14:50:00Z"
+ },
+ "ORD-000006": {
+ "order_id": "ORD-000006",
+ "account_id": "acc_marcus_002",
+ "symbol": "GOOGL",
+ "side": "buy",
+ "order_type": "market",
+ "quantity": 5,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 159.1,
+ "created_at": "2026-03-03T12:22:19Z",
+ "filled_at": "2026-03-03T12:22:19Z"
+ },
+ "ORD-000007": {
+ "order_id": "ORD-000007",
+ "account_id": "acc_marcus_002",
+ "symbol": "XOM",
+ "side": "buy",
+ "order_type": "limit",
+ "quantity": 20,
+ "limit_price": 103.0,
+ "status": "open",
+ "executed_price": null,
+ "created_at": "2026-03-03T13:15:40Z",
+ "filled_at": null
+ },
+ "ORD-000008": {
+ "order_id": "ORD-000008",
+ "account_id": "acc_sofia_003",
+ "symbol": "META",
+ "side": "buy",
+ "order_type": "market",
+ "quantity": 4,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 501.2,
+ "created_at": "2026-03-04T09:41:56Z",
+ "filled_at": "2026-03-04T09:41:56Z"
+ },
+ "ORD-000009": {
+ "order_id": "ORD-000009",
+ "account_id": "acc_sofia_003",
+ "symbol": "AMD",
+ "side": "buy",
+ "order_type": "limit",
+ "quantity": 25,
+ "limit_price": 149.0,
+ "status": "filled",
+ "executed_price": 149.0,
+ "created_at": "2026-03-04T10:03:21Z",
+ "filled_at": "2026-03-04T10:07:02Z"
+ },
+ "ORD-000010": {
+ "order_id": "ORD-000010",
+ "account_id": "acc_sofia_003",
+ "symbol": "BAC",
+ "side": "buy",
+ "order_type": "market",
+ "quantity": 60,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 34.1,
+ "created_at": "2026-03-04T11:28:33Z",
+ "filled_at": "2026-03-04T11:28:33Z"
+ },
+ "ORD-000011": {
+ "order_id": "ORD-000011",
+ "account_id": "acc_ethan_004",
+ "symbol": "JPM",
+ "side": "buy",
+ "order_type": "market",
+ "quantity": 15,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 180.2,
+ "created_at": "2026-03-05T15:16:48Z",
+ "filled_at": "2026-03-05T15:16:48Z"
+ },
+ "ORD-000012": {
+ "order_id": "ORD-000012",
+ "account_id": "acc_ethan_004",
+ "symbol": "UNH",
+ "side": "buy",
+ "order_type": "limit",
+ "quantity": 3,
+ "limit_price": 533.0,
+ "status": "filled",
+ "executed_price": 533.0,
+ "created_at": "2026-03-05T15:55:09Z",
+ "filled_at": "2026-03-05T16:01:44Z"
+ },
+ "ORD-000013": {
+ "order_id": "ORD-000013",
+ "account_id": "acc_priya_005",
+ "symbol": "PFE",
+ "side": "buy",
+ "order_type": "market",
+ "quantity": 100,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 29.95,
+ "created_at": "2026-03-06T13:09:55Z",
+ "filled_at": "2026-03-06T13:09:55Z"
+ },
+ "ORD-000014": {
+ "order_id": "ORD-000014",
+ "account_id": "acc_priya_005",
+ "symbol": "AAPL",
+ "side": "buy",
+ "order_type": "limit",
+ "quantity": 8,
+ "limit_price": 181.5,
+ "status": "open",
+ "executed_price": null,
+ "created_at": "2026-03-06T13:40:17Z",
+ "filled_at": null
+ },
+ "ORD-000015": {
+ "order_id": "ORD-000015",
+ "account_id": "acc_noah_006",
+ "symbol": "AMD",
+ "side": "buy",
+ "order_type": "market",
+ "quantity": 20,
+ "limit_price": null,
+ "status": "filled",
+ "executed_price": 150.9,
+ "created_at": "2026-03-07T10:12:43Z",
+ "filled_at": "2026-03-07T10:12:43Z"
+ }
+ },
+ "meta": {
+ "next_order_id": 16,
+ "currency": "USD",
+ "updated_at": "2026-03-11T00:00:00Z"
+ }
+}
diff --git a/sample_solutions/AgenticCodeExecution/examples/stocks/mcp_stocks_server.py b/sample_solutions/AgenticCodeExecution/examples/stocks/mcp_stocks_server.py
new file mode 100644
index 00000000..410b93f5
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/stocks/mcp_stocks_server.py
@@ -0,0 +1,742 @@
+#!/usr/bin/env python3
+"""
+MCP Server for Demo Stocks Tools - Fully Standalone
+
+All business logic is directly in the MCP tools - no intermediate wrapper classes.
+"""
+
+import argparse
+import json
+import os
+import sys
+from copy import deepcopy
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from fastmcp import FastMCP
+
+# Add parent directory to sys.path for shared modules (error_hints)
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from error_hints import analyze_execution_error
+from stocks_data_model import StocksDB
+
+
+DEFAULT_DB_PATH = str(Path(__file__).resolve().parent / "data" / "db.json")
+
+
+def ensure_db(db_path: str) -> None:
+ """Check that the stocks database exists; exit with instructions if missing."""
+ if Path(db_path).exists():
+ return
+ print(f"\n❌ Database not found: {db_path}")
+ print(f" The stocks database is included in the repository.")
+ print(f" Make sure the data/ directory is present (e.g. git checkout).")
+ sys.exit(1)
+
+
+mcp = FastMCP(
+ "Stocks Tools Server",
+ instructions="""You are a stock trading support agent. Use these tools to help users with:
+- Looking up account and portfolio details
+- Retrieving market quotes and symbol lists
+- Placing/cancelling market and limit orders
+- Reviewing order history and account balances
+
+Always verify account identity before any trade. Ask for explicit confirmation before placing or cancelling orders.""",
+)
+
+
+_db: Optional[Dict[str, Any]] = None
+_original_db_path: str = ""
+_session_dbs: Dict[str, Dict[str, Any]] = {}
+SESSION_DB_DIR = Path(__file__).resolve().parent.parent / "session_dbs"
+SESSION_DB_DIR.mkdir(exist_ok=True)
+
+
+def _now_iso() -> str:
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def _session_db_file(session_id: str) -> Path:
+ safe = "".join(ch for ch in session_id if ch.isalnum() or ch in ("-", "_"))
+ if not safe:
+ safe = "session"
+ return SESSION_DB_DIR / f"stocks_{safe[:64]}.json"
+
+
+def _load_db(path: str) -> Dict[str, Any]:
+ db = StocksDB.load(path)
+ return db.model_dump(mode="python")
+
+
+def _save_db(db: Dict[str, Any]) -> None:
+ db_path = db.get("_db_path", "")
+ if db_path:
+ persist = {k: v for k, v in db.items() if k != "_db_path"}
+ validated_db = StocksDB.model_validate(persist)
+ validated_db._db_path = db_path
+ validated_db.save()
+
+
+def get_db(session_id: str = "") -> Dict[str, Any]:
+ global _db, _original_db_path
+
+ if _db is None:
+ db_path = os.environ.get("STOCKS_DB_PATH", DEFAULT_DB_PATH)
+ _original_db_path = db_path
+ _db = _load_db(db_path)
+ _db["_db_path"] = ""
+ print(f"Loaded template stocks database from {db_path}")
+ print(f" - {len(_db.get('accounts', {}))} accounts")
+ print(f" - {len(_db.get('market', {}))} symbols")
+ print(f" - {len(_db.get('orders', {}))} orders")
+
+ if not session_id:
+ return _db
+
+ if session_id not in _session_dbs:
+ db = _load_db(_original_db_path)
+ db["_db_path"] = str(_session_db_file(session_id))
+ _session_dbs[session_id] = db
+ print(f"🆕 Created pristine stocks DB for session {session_id[:8]}... ({len(_session_dbs)} active sessions)")
+
+ return _session_dbs[session_id]
+
+
+def _normalize_symbol(symbol: str, db: Dict[str, Any]) -> str:
+ sym = symbol.strip().upper()
+ if sym not in db.get("market", {}):
+ raise ValueError(f"Symbol not found: {sym}")
+ return sym
+
+
+def _require_account(db: Dict[str, Any], account_id: str) -> Dict[str, Any]:
+ account = db.get("accounts", {}).get(account_id)
+ if not account:
+ raise ValueError("Account not found")
+ return account
+
+
+def _next_order_id(db: Dict[str, Any]) -> str:
+ next_id = int(db.get("meta", {}).get("next_order_id", 1))
+ order_id = f"ORD-{next_id:06d}"
+ db.setdefault("meta", {})["next_order_id"] = next_id + 1
+ return order_id
+
+
+def _account_snapshot(account: Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "account_id": account["account_id"],
+ "name": account["name"],
+ "email": account["email"],
+ "cash_balance": round(float(account["cash_balance"]), 2),
+ "positions": account.get("positions", {}),
+ "watchlist": account.get("watchlist", []),
+ "order_ids": account.get("order_ids", []),
+ }
+
+
+def _market_price(db: Dict[str, Any], symbol: str) -> float:
+ return float(db["market"][symbol]["current_price"])
+
+
+def _ensure_quantity(quantity: int) -> int:
+ if quantity <= 0:
+ raise ValueError("Quantity must be greater than 0")
+ return int(quantity)
+
+
+def _ensure_limit_price(limit_price: float) -> float:
+ if limit_price <= 0:
+ raise ValueError("limit_price must be greater than 0")
+ return round(float(limit_price), 2)
+
+
+def _apply_buy_fill(account: Dict[str, Any], symbol: str, quantity: int, price: float) -> None:
+ total_cost = round(quantity * price, 2)
+ if float(account["cash_balance"]) < total_cost:
+ raise ValueError("Insufficient cash balance")
+
+ positions = account.setdefault("positions", {})
+ existing = positions.get(symbol)
+ if existing:
+ old_qty = int(existing["quantity"])
+ old_avg = float(existing["avg_cost"])
+ new_qty = old_qty + quantity
+ new_avg = ((old_qty * old_avg) + (quantity * price)) / new_qty
+ positions[symbol] = {"quantity": new_qty, "avg_cost": round(new_avg, 4)}
+ else:
+ positions[symbol] = {"quantity": quantity, "avg_cost": round(price, 4)}
+
+ account["cash_balance"] = round(float(account["cash_balance"]) - total_cost, 2)
+
+
+def _apply_sell_fill(account: Dict[str, Any], symbol: str, quantity: int, price: float) -> None:
+ positions = account.setdefault("positions", {})
+ existing = positions.get(symbol)
+ if not existing or int(existing["quantity"]) < quantity:
+ raise ValueError("Insufficient shares to sell")
+
+ existing["quantity"] = int(existing["quantity"]) - quantity
+ if existing["quantity"] == 0:
+ del positions[symbol]
+
+ proceeds = round(quantity * price, 2)
+ account["cash_balance"] = round(float(account["cash_balance"]) + proceeds, 2)
+
+
+def _create_order(
+ db: Dict[str, Any],
+ account: Dict[str, Any],
+ symbol: str,
+ side: str,
+ order_type: str,
+ quantity: int,
+ limit_price: Optional[float],
+ status: str,
+ executed_price: Optional[float],
+) -> Dict[str, Any]:
+ order_id = _next_order_id(db)
+ now = _now_iso()
+ order = {
+ "order_id": order_id,
+ "account_id": account["account_id"],
+ "symbol": symbol,
+ "side": side,
+ "order_type": order_type,
+ "quantity": quantity,
+ "limit_price": limit_price,
+ "status": status,
+ "executed_price": executed_price,
+ "created_at": now,
+ "filled_at": now if status == "filled" else None,
+ }
+ db.setdefault("orders", {})[order_id] = order
+ account.setdefault("order_ids", []).append(order_id)
+ db.setdefault("meta", {})["updated_at"] = now
+ return order
+
+
+def _get_data_model_defs() -> Dict[str, dict]:
+ return {
+ "Position": {
+ "description": "Current holdings for a symbol",
+ "properties": {
+ "quantity": {"type": "integer"},
+ "avg_cost": {"type": "number"},
+ },
+ },
+ "Account": {
+ "description": "Trading account profile",
+ "properties": {
+ "account_id": {"type": "string"},
+ "name": {"type": "string"},
+ "email": {"type": "string"},
+ "cash_balance": {"type": "number"},
+ "positions": {"type": "object"},
+ "watchlist": {"type": "array"},
+ "order_ids": {"type": "array"},
+ },
+ },
+ "Quote": {
+ "description": "Current market quote",
+ "properties": {
+ "symbol": {"type": "string"},
+ "name": {"type": "string"},
+ "sector": {"type": "string"},
+ "current_price": {"type": "number"},
+ "day_open": {"type": "number"},
+ "day_high": {"type": "number"},
+ "day_low": {"type": "number"},
+ "volume": {"type": "integer"},
+ },
+ },
+ "Order": {
+ "description": "Trade order record",
+ "properties": {
+ "order_id": {"type": "string"},
+ "account_id": {"type": "string"},
+ "symbol": {"type": "string"},
+ "side": {"type": "string"},
+ "order_type": {"type": "string"},
+ "quantity": {"type": "integer"},
+ "limit_price": {"type": "number"},
+ "status": {"type": "string"},
+ "executed_price": {"type": "number"},
+ "created_at": {"type": "string"},
+ "filled_at": {"type": "string"},
+ },
+ },
+ "Mover": {
+ "description": "Simple market mover snapshot",
+ "properties": {
+ "symbol": {"type": "string"},
+ "name": {"type": "string"},
+ "percent_change": {"type": "number"},
+ "current_price": {"type": "number"},
+ },
+ },
+ }
+
+
+def _get_tool_metadata_payload() -> Dict[str, Any]:
+ ordered_actions = [
+ "calculate",
+ "find_account_id_by_email",
+ "get_account_summary",
+ "get_portfolio",
+ "get_quote",
+ "list_available_symbols",
+ "list_market_movers",
+ "get_order_history",
+ "place_market_buy",
+ "place_market_sell",
+ "place_limit_buy",
+ "place_limit_sell",
+ "cancel_open_order",
+ "transfer_to_human_agents",
+ ]
+
+ return {
+ "ordered_actions": ordered_actions,
+ "return_types": {
+ "calculate": "str",
+ "find_account_id_by_email": "str",
+ "get_account_summary": "str (JSON)",
+ "get_portfolio": "str (JSON)",
+ "get_quote": "str (JSON)",
+ "list_available_symbols": "str (JSON)",
+ "list_market_movers": "str (JSON)",
+ "get_order_history": "str (JSON)",
+ "place_market_buy": "str (JSON)",
+ "place_market_sell": "str (JSON)",
+ "place_limit_buy": "str (JSON)",
+ "place_limit_sell": "str (JSON)",
+ "cancel_open_order": "str (JSON)",
+ "transfer_to_human_agents": "str",
+ },
+ "semantic_types": {
+ "get_account_summary": "Account",
+ "get_portfolio": "dict[symbol, Position]",
+ "get_quote": "Quote",
+ "list_available_symbols": "dict[symbol, name]",
+ "list_market_movers": "list[Mover]",
+ "get_order_history": "list[Order]",
+ "place_market_buy": "Order",
+ "place_market_sell": "Order",
+ "place_limit_buy": "Order",
+ "place_limit_sell": "Order",
+ "cancel_open_order": "Order",
+ },
+ "data_model_defs": _get_data_model_defs(),
+ }
+
+
+@mcp.tool()
+def find_account_id_by_email(email: str, session_id: str = "") -> str:
+ """Find account id by email. Use this first to identify a customer.
+
+ Args:
+ email: Account email, such as 'jane.miller@example.com'.
+
+ Returns:
+ The account id if found.
+ """
+ db = get_db(session_id)
+ for account_id, account in db.get("accounts", {}).items():
+ if account.get("email", "").lower() == email.lower():
+ return account_id
+ raise ValueError("Account not found")
+
+
+@mcp.tool()
+def get_account_summary(account_id: str, session_id: str = "") -> str:
+ """Get account profile summary including cash balance and watchlist.
+
+ Args:
+ account_id: The trading account id.
+
+ Returns:
+ A JSON STRING (not a dict). You MUST parse it: account = json.loads(result)
+ """
+ db = get_db(session_id)
+ account = _require_account(db, account_id)
+ return json.dumps(_account_snapshot(account), indent=2)
+
+
+@mcp.tool()
+def get_portfolio(account_id: str, session_id: str = "") -> str:
+ """Get current portfolio positions for an account.
+
+ Args:
+ account_id: The trading account id.
+
+ Returns:
+ A JSON STRING of positions keyed by symbol.
+ """
+ db = get_db(session_id)
+ account = _require_account(db, account_id)
+ return json.dumps(account.get("positions", {}), indent=2)
+
+
+@mcp.tool()
+def get_quote(symbol: str, session_id: str = "") -> str:
+ """Get current market quote for a stock symbol.
+
+ Args:
+ symbol: Stock ticker symbol, such as 'AAPL'.
+
+ Returns:
+ A JSON STRING with quote fields.
+ """
+ db = get_db(session_id)
+ sym = _normalize_symbol(symbol, db)
+ return json.dumps(db["market"][sym], indent=2)
+
+
+@mcp.tool()
+def list_available_symbols(session_id: str = "") -> str:
+ """List all available symbols in the demo market.
+
+ Returns:
+ A JSON STRING dictionary of {symbol: company_name}.
+ """
+ db = get_db(session_id)
+ result = {
+ symbol: info.get("name", symbol)
+ for symbol, info in sorted(db.get("market", {}).items())
+ }
+ return json.dumps(result, indent=2)
+
+
+@mcp.tool()
+def list_market_movers(top_n: int = 5, session_id: str = "") -> str:
+ """List top movers by absolute percent change from day open.
+
+ Args:
+ top_n: Maximum number of movers to return.
+
+ Returns:
+ A JSON STRING list of mover objects.
+ """
+ db = get_db(session_id)
+ movers = []
+ for symbol, quote in db.get("market", {}).items():
+ day_open = float(quote["day_open"])
+ current = float(quote["current_price"])
+ pct = ((current - day_open) / day_open) * 100 if day_open else 0.0
+ movers.append(
+ {
+ "symbol": symbol,
+ "name": quote.get("name", symbol),
+ "percent_change": round(pct, 4),
+ "current_price": current,
+ }
+ )
+
+ movers.sort(key=lambda item: abs(item["percent_change"]), reverse=True)
+ return json.dumps(movers[: max(1, top_n)], indent=2)
+
+
+@mcp.tool()
+def get_order_history(account_id: str, limit: int = 20, session_id: str = "") -> str:
+ """Get recent order history for an account.
+
+ Args:
+ account_id: Trading account id.
+ limit: Maximum number of orders to return.
+
+ Returns:
+ A JSON STRING list of orders, newest first.
+ """
+ db = get_db(session_id)
+ account = _require_account(db, account_id)
+ all_orders = db.get("orders", {})
+ order_ids = list(account.get("order_ids", []))
+ order_ids.sort(reverse=True)
+
+ history = [all_orders[order_id] for order_id in order_ids if order_id in all_orders]
+ return json.dumps(history[: max(1, limit)], indent=2)
+
+
+@mcp.tool()
+def calculate(expression: str, session_id: str = "") -> str:
+ """Calculate the result of a mathematical expression.
+
+ Args:
+ expression: Expression such as '10000 * 0.05'.
+
+ Returns:
+ The calculated result as a string.
+ """
+ if not all(char in "0123456789+-*/(). " for char in expression):
+ raise ValueError("Invalid characters in expression")
+ return str(round(float(eval(expression, {"__builtins__": None}, {})), 6))
+
+
+@mcp.tool()
+def transfer_to_human_agents(summary: str, session_id: str = "") -> str:
+ """Transfer the customer to a human agent.
+
+ Returns:
+ Confirmation message.
+ """
+ return "Transfer successful"
+
+
+@mcp.tool()
+def get_execution_error_hint(error_msg: str, code: str = "", session_id: str = "") -> str:
+ """Return a recovery hint for sandbox execution/tool errors."""
+ return analyze_execution_error(error_msg=error_msg, code=code, domain="stocks")
+
+
+@mcp.tool()
+def get_tool_metadata(session_id: str = "") -> str:
+ """Return metadata used to build execute_python action/data-model description."""
+ return json.dumps(_get_tool_metadata_payload())
+
+
+@mcp.tool()
+def place_market_buy(account_id: str, symbol: str, quantity: int, session_id: str = "") -> str:
+ """Place a market buy order and execute immediately at current price.
+
+ Ask for explicit user confirmation before placing the order.
+
+ Args:
+ account_id: Trading account id.
+ symbol: Ticker symbol such as 'AAPL'.
+ quantity: Number of shares to buy.
+
+ Returns:
+ A JSON STRING order object.
+ """
+ db = get_db(session_id)
+ account = _require_account(db, account_id)
+ sym = _normalize_symbol(symbol, db)
+ qty = _ensure_quantity(quantity)
+ price = _market_price(db, sym)
+
+ _apply_buy_fill(account, sym, qty, price)
+ order = _create_order(
+ db,
+ account,
+ symbol=sym,
+ side="buy",
+ order_type="market",
+ quantity=qty,
+ limit_price=None,
+ status="filled",
+ executed_price=price,
+ )
+
+ _save_db(db)
+ return json.dumps(order, indent=2)
+
+
+@mcp.tool()
+def place_market_sell(account_id: str, symbol: str, quantity: int, session_id: str = "") -> str:
+ """Place a market sell order and execute immediately at current price.
+
+ Ask for explicit user confirmation before placing the order.
+
+ Args:
+ account_id: Trading account id.
+ symbol: Ticker symbol such as 'AAPL'.
+ quantity: Number of shares to sell.
+
+ Returns:
+ A JSON STRING order object.
+ """
+ db = get_db(session_id)
+ account = _require_account(db, account_id)
+ sym = _normalize_symbol(symbol, db)
+ qty = _ensure_quantity(quantity)
+ price = _market_price(db, sym)
+
+ _apply_sell_fill(account, sym, qty, price)
+ order = _create_order(
+ db,
+ account,
+ symbol=sym,
+ side="sell",
+ order_type="market",
+ quantity=qty,
+ limit_price=None,
+ status="filled",
+ executed_price=price,
+ )
+
+ _save_db(db)
+ return json.dumps(order, indent=2)
+
+
+@mcp.tool()
+def place_limit_buy(
+ account_id: str,
+ symbol: str,
+ quantity: int,
+ limit_price: float,
+ session_id: str = "",
+) -> str:
+ """Place a limit buy order.
+
+ If current price <= limit_price, order is filled immediately; otherwise stays open.
+ Ask for explicit user confirmation before placing the order.
+
+ Returns:
+ A JSON STRING order object.
+ """
+ db = get_db(session_id)
+ account = _require_account(db, account_id)
+ sym = _normalize_symbol(symbol, db)
+ qty = _ensure_quantity(quantity)
+ limit_px = _ensure_limit_price(limit_price)
+ current = _market_price(db, sym)
+
+ status = "open"
+ executed_price: Optional[float] = None
+ if current <= limit_px:
+ _apply_buy_fill(account, sym, qty, current)
+ status = "filled"
+ executed_price = current
+
+ order = _create_order(
+ db,
+ account,
+ symbol=sym,
+ side="buy",
+ order_type="limit",
+ quantity=qty,
+ limit_price=limit_px,
+ status=status,
+ executed_price=executed_price,
+ )
+
+ _save_db(db)
+ return json.dumps(order, indent=2)
+
+
+@mcp.tool()
+def place_limit_sell(
+ account_id: str,
+ symbol: str,
+ quantity: int,
+ limit_price: float,
+ session_id: str = "",
+) -> str:
+ """Place a limit sell order.
+
+ If current price >= limit_price, order is filled immediately; otherwise stays open.
+ Ask for explicit user confirmation before placing the order.
+
+ Returns:
+ A JSON STRING order object.
+ """
+ db = get_db(session_id)
+ account = _require_account(db, account_id)
+ sym = _normalize_symbol(symbol, db)
+ qty = _ensure_quantity(quantity)
+ limit_px = _ensure_limit_price(limit_price)
+ current = _market_price(db, sym)
+
+ positions = account.setdefault("positions", {})
+ existing = positions.get(sym)
+ if not existing or int(existing.get("quantity", 0)) < qty:
+ raise ValueError("Insufficient shares to place sell order")
+
+ status = "open"
+ executed_price: Optional[float] = None
+ if current >= limit_px:
+ _apply_sell_fill(account, sym, qty, current)
+ status = "filled"
+ executed_price = current
+
+ order = _create_order(
+ db,
+ account,
+ symbol=sym,
+ side="sell",
+ order_type="limit",
+ quantity=qty,
+ limit_price=limit_px,
+ status=status,
+ executed_price=executed_price,
+ )
+
+ _save_db(db)
+ return json.dumps(order, indent=2)
+
+
+@mcp.tool()
+def cancel_open_order(account_id: str, order_id: str, session_id: str = "") -> str:
+ """Cancel an open order.
+
+ Ask for explicit user confirmation before cancellation.
+
+ Args:
+ account_id: Trading account id.
+ order_id: Order ID such as 'ORD-000004'.
+
+ Returns:
+ A JSON STRING order object.
+ """
+ db = get_db(session_id)
+ _require_account(db, account_id)
+
+ order = db.get("orders", {}).get(order_id)
+ if not order:
+ raise ValueError("Order not found")
+ if order.get("account_id") != account_id:
+ raise ValueError("Order does not belong to account")
+ if order.get("status") != "open":
+ raise ValueError("Only open orders can be cancelled")
+
+ order["status"] = "cancelled"
+ db.setdefault("meta", {})["updated_at"] = _now_iso()
+ _save_db(db)
+ return json.dumps(order, indent=2)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Stocks MCP Server")
+ parser.add_argument(
+ "--db-path",
+ default=DEFAULT_DB_PATH,
+ help="Path to the stocks database JSON file",
+ )
+ parser.add_argument(
+ "--port",
+ type=int,
+ default=5050,
+ help="Port to run the SSE server on",
+ )
+ parser.add_argument(
+ "--host",
+ default="0.0.0.0",
+ help="Host to bind to",
+ )
+ parser.add_argument(
+ "--transport",
+ choices=["sse", "stdio"],
+ default="sse",
+ help="Transport protocol to use",
+ )
+
+ args = parser.parse_args()
+ os.environ["STOCKS_DB_PATH"] = args.db_path
+
+ ensure_db(args.db_path)
+ get_db()
+ print(" Original DB file is READ-ONLY (per-session copies used for mutations)")
+ print(f" Session DB dir: {SESSION_DB_DIR}")
+
+ print("\n🚀 Starting Stocks MCP Server...")
+ print(f" Transport: {args.transport}")
+ if args.transport == "sse":
+ print(f" Host: {args.host}")
+ print(f" Port: {args.port}")
+ print(f" SSE endpoint: http://{args.host}:{args.port}/sse")
+
+ mcp.run(transport=args.transport, host=args.host, port=args.port)
diff --git a/sample_solutions/AgenticCodeExecution/examples/stocks/stocks-system-prompt.txt b/sample_solutions/AgenticCodeExecution/examples/stocks/stocks-system-prompt.txt
new file mode 100644
index 00000000..50d37570
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/stocks/stocks-system-prompt.txt
@@ -0,0 +1,352 @@
+
+You are a customer service agent that helps the user according to the provided below.
+
+============================================================
+SECTION 1 — GENERIC EXECUTE_PYTHON INSTRUCTIONS
+============================================================
+
+HOW YOU WORK:
+You have ONE tool: execute_python. It runs Python code in a sandbox.
+Inside that sandbox, an `actions` object is pre-loaded with methods you call to perform operations (look up records, search options, modify state, etc.).
+The full list of available actions and their parameters is documented in the execute_python tool description — READ IT before writing code.
+
+TURN STRUCTURE (STRICT):
+In each turn you must do EXACTLY ONE of the following — never both:
+ A) Send a text message to the user, OR
+ B) Make an execute_python call.
+You MUST NOT combine a message and a tool call in the same turn.
+In particular, if execute_python returned an error, your next turn must be ONLY an execute_python call with fixed code — no text, no apology, no explanation.
+
+CRITICAL — WRITE COMPLETE SCRIPTS:
+A single execute_python call can contain MANY actions.* calls chained together in one script.
+You MUST combine all related steps into ONE execute_python call.
+Do NOT make separate execute_python calls for each individual action.
+
+Think about what information you need, then write ONE script that gathers and processes ALL of it.
+Only make a second execute_python call if the first one fails or if you need user input before continuing.
+
+SANDBOX ENVIRONMENT:
+Your code runs in a restricted Python sandbox. These constraints apply:
+- ALLOWED IMPORTS: json, math, datetime, time, re, collections, itertools — nothing else
+- NO file I/O: open(), os, sys, subprocess, pathlib are all blocked
+- NO exec(), eval(), compile()
+- NO dunder attributes: __name__, __class__, __dict__ etc. are blocked
+- NO input(): extract information from the conversation instead
+- The `actions` object is ALREADY AVAILABLE — do not import it
+
+CRITICAL INPUT BAN (HIGHEST PRIORITY):
+- NEVER use input() in execute_python code.
+- NEVER use placeholder variables such as `input`, `Input`, or `user_input`.
+- NEVER write code like `email = input(...)` or `value = Input`.
+- If any value is missing, ask the user in a normal assistant message (not in Python code).
+- In execute_python code, only use values already present in conversation/tool outputs.
+- Any script using input() is invalid and must be rewritten before execution.
+
+CODE GENERATION RULES:
+
+1. Treat actions.* outputs by TYPE:
+ - If output is structured data (object/list), parse with json.loads() before field access.
+ - If output is a scalar (e.g., identifier/status string), use it directly.
+
+ Safe pattern:
+ import json
+ raw = actions.ANY_METHOD(args)
+ # parse when raw looks like structured JSON; otherwise use raw directly
+ data = json.loads(raw) if isinstance(raw, str) and raw.lstrip().startswith('[') else raw
+ # for JSON objects, use the same idea: parse when output is structured JSON text
+
+ ❌ WRONG — accessing fields on the raw JSON string:
+ result = actions.some_write_method(...)
+ print(result['status']) # CRASH: result is a STRING, not a dict
+
+ ✅ CORRECT — parse first, then access:
+ result = json.loads(actions.some_write_method(...))
+ print(result['status'])
+
+2. ALWAYS print results — print() is the ONLY way to see output:
+ print(data)
+
+3. DICT vs LIST — read the signature line for each action:
+ Many actions return DICTS (not lists). Iterating a dict gives you KEYS (strings), not objects.
+
+ ❌ WRONG — treats dict as list of objects:
+ for item in data:
+ print(item['name']) # CRASH: item is a string key, not a dict
+
+ ✅ CORRECT — use .items() for dicts:
+ for key, value in data.items():
+ print(key, value)
+
+ When unsure, print the data first: print(type(data), data)
+
+4. STATELESS: Variables do NOT persist between execute_python calls.
+ Put ALL steps in ONE script.
+
+5. NEVER fabricate identifiers or option values.
+ Extract concrete values from tool outputs and reuse them exactly.
+ Never pass placeholder tokens like "user_id", "order_id", "item_id", "payment_method_id"
+ as actual values. Those are parameter NAMES, not real values.
+
+6. PRE-FLIGHT CHECKLIST before any state-changing action:
+ Before calling any write action, verify all required arguments come from current
+ data in THIS script and satisfy preconditions.
+
+ Generic checks:
+ - Every argument variable is defined before use (no undefined names).
+ - No input()/Input/user_input usage anywhere in the script.
+ - Entities referenced by the action are confirmed to exist in retrieved data.
+ - Current state allows the action (e.g., status/eligibility/business constraints).
+ - Argument values are concrete runtime values (not placeholder strings).
+
+ Pattern:
+ # 1) Read current state
+ state = json.loads(actions.some_read_method(...))
+ print(state)
+
+ # 2) Validate preconditions and required values
+ # (extract concrete values from state; do not invent placeholders)
+
+ # 3) Execute write action only after checks
+ result = json.loads(actions.some_write_method(...))
+ print(result)
+
+7. LOOKUP + EXECUTION GUARDRAILS (GENERIC):
+ A) LOOKUP FALLBACK (max 2 tries):
+ - Try one primary lookup strategy.
+ - If it fails, try one alternative strategy.
+ - If both fail, DO NOT guess values and DO NOT run write actions.
+ Ask the user for the missing value(s) in a normal assistant message.
+
+ B) NEVER USE PLACEHOLDER LITERALS AS REAL VALUES:
+ Parameter names are not runtime values.
+ Forbidden as actual values: "user_id", "order_id", "item_id", "product_id",
+ "payment_method_id", "email", "first_name", "last_name", "zip", etc.
+
+ C) READ-THEN-WRITE DISCIPLINE:
+ Before any write action, explicitly read current state and confirm:
+ - target entity exists
+ - operation is allowed in current state
+ - selected replacement/option/value is valid in current data
+
+ D) ARGUMENT SANITY CHECKS BEFORE WRITE:
+ For each required argument:
+ - it is defined in this script
+ - it is non-empty and concrete (not placeholder text)
+ - it was derived from conversation/tool output
+ Print chosen argument values before the write call.
+
+ E) TWO-PHASE SCRIPT TEMPLATE:
+ - Phase 1: gather and print all required facts
+ - Phase 2: execute write action only if checks pass
+
+8. MANDATORY PRE-EXECUTION SELF-CHECK (RUN THIS MENTALLY BEFORE EVERY execute_python CALL):
+ - No `input()`, `input`, `Input`, or `user_input` anywhere in code.
+ - No placeholder literals used as real values (`"user_id"`, `"order_id"`, etc.).
+ - Every variable used in arguments is defined in THIS script.
+ - For every critical value, print it before write actions.
+ - If a lookup failed twice, DO NOT continue with write actions; ask user for corrected value.
+
+9. STRICT LOOKUP FAILURE RULE:
+ If authentication/lookup fails (target entity not found), you MUST stop mutation flow.
+ Do not call write actions after unresolved lookup failure.
+ Your next action is either:
+ - one alternative lookup attempt, OR
+ - ask the user for corrected identifying information.
+
+SILENT ERROR RECOVERY (MANDATORY):
+When execute_python returns an error, you MUST:
+1. NOT produce any text message — no apology, no explanation, no "let me try again"
+2. Immediately make another execute_python call with corrected code
+3. The user must NEVER know an error occurred
+
+This means your very next turn after an error is ONLY a tool call — zero words to the user.
+
+❌ WRONG: "I apologize for the error. Let me try a different approach." + execute_python
+❌ WRONG: "Let me fix that." + execute_python
+❌ WRONG: Any text at all before or alongside the retry
+✅ CORRECT: execute_python (with fixed code, no accompanying text)
+
+Only speak to the user when you have a successful result or need information from them.
+If after 3 retries you still cannot complete the task, you may say there is a temporary technical issue — but never mention implementation details.
+
+ANTI-LOOP GUARD (GENERIC):
+- If you hit the same error class twice in a row (e.g., repeated "not found" or repeated type/index error),
+ stop retrying variations and switch strategy:
+ 1) one alternative lookup/validation path, then
+ 2) ask user for the missing/corrected value if still unresolved.
+- Do not burn steps by repeating near-identical failing scripts.
+
+============================================================
+SECTION 2 — USE-CASE SPECIFIC EXAMPLES (STOCKS)
+============================================================
+
+CRITICAL — WRITE COMPLETE SCRIPTS EXAMPLE:
+
+❌ WRONG (multiple execute_python calls, one action each):
+ Turn 1: execute_python → actions.find_account_id_by_email("jane@example.com")
+ Turn 2: execute_python → actions.get_account_summary("acc_jane_001")
+ Turn 3: execute_python → actions.place_market_buy("acc_jane_001", "AAPL", 5)
+
+✅ CORRECT (one execute_python call with a complete script):
+ execute_python →
+ import json
+ account_id = actions.find_account_id_by_email("jane.miller@example.com")
+ print(f"Account ID: {account_id}")
+ account = json.loads(actions.get_account_summary(account_id))
+ print(f"Cash: {account['cash_balance']}")
+ quote = json.loads(actions.get_quote("AAPL"))
+ print(f"AAPL price: {quote['current_price']}")
+
+STOCKS STATE TRUTH RULE (MANDATORY):
+- Never report balances, positions, order status, or order history from memory.
+- Every factual state claim must come from tool output retrieved in the SAME execute_python script.
+- Before replying with account/order state, re-read from DB using:
+ `get_account_summary`, `get_portfolio`, and/or `get_order_history`.
+
+STOCKS IDENTIFIER DISCIPLINE:
+- Never invent account IDs, order IDs, symbols, prices, or quantities.
+- Account actions must start with account identification (usually by email).
+- Trade symbols must come from user request + `list_available_symbols()` / `get_quote()` validation.
+- For cancel requests, verify order ownership and current order status before mutation.
+
+STOCKS API NAME DISCIPLINE (MANDATORY):
+- Use only stocks actions listed in the current API REFERENCE.
+- Allowed account + market read methods:
+ `actions.find_account_id_by_email`, `actions.get_account_summary`, `actions.get_portfolio`,
+ `actions.get_quote`, `actions.list_available_symbols`, `actions.list_market_movers`,
+ `actions.get_order_history`.
+- Allowed trading/write methods:
+ `actions.place_market_buy`, `actions.place_market_sell`,
+ `actions.place_limit_buy`, `actions.place_limit_sell`, `actions.cancel_open_order`.
+- Allowed utility methods:
+ `actions.calculate`, `actions.transfer_to_human_agents`.
+- If a method name is not in this stocks allowlist or current stocks API REFERENCE, do not call it.
+
+FIRST AUTH TURN TEMPLATE (STOCKS):
+- After user provides email, the first auth script must use:
+ 1) `account_id = actions.find_account_id_by_email(email)`
+ 2) `account = json.loads(actions.get_account_summary(account_id))`
+ 3) `portfolio = json.loads(actions.get_portfolio(account_id))`
+ 4) print all three outputs
+- Do not use any `user_id` variable in stocks workflows.
+
+AUTHENTICATION GATE (MANDATORY):
+- Never claim the user is authenticated before BOTH conditions are true:
+ 1) user has explicitly provided an email in conversation, and
+ 2) `find_account_id_by_email(email)` succeeded in execute_python.
+- If email is missing, ask for email and do NOT run account/trading tools.
+- If lookup fails, do NOT claim success; ask for corrected email.
+- Do not mention account name, balances, holdings, or order history until auth succeeds.
+- In the first successful auth response, include the exact authenticated `account_id` from tool output.
+
+MUTATION EXECUTION CONTRACT (MANDATORY):
+- For mutation intents (`place_market_buy`, `place_market_sell`, `place_limit_buy`,
+ `place_limit_sell`, `cancel_open_order`):
+ 1) first ask for confirmation,
+ 2) after user confirms ("yes"), your NEXT turn must be an `execute_python` call that performs the mutation.
+- Do not send a text-only "success" message before that tool call occurs.
+- Never claim an order was placed/cancelled unless tool output in that turn contains concrete mutation evidence
+ (for example `order_id`, `status`, and order details).
+- If the tool call fails, do not pretend success; follow retry/reconciliation rules.
+
+STOCKS TRADE PRECHECKS (MANDATORY):
+Before any buy/sell/cancel call, gather and print:
+1) account_id and current cash balance / holdings
+2) symbol and live quote used for decision
+3) quantity and order type parameters
+4) for limits: selected limit price and relation to current market price
+
+If any required value is missing or invalid, ask the user rather than guessing.
+
+POST-WRITE VERIFICATION (MANDATORY):
+- After any mutation (`place_*` / `cancel_open_order`), in the SAME execute_python script:
+ 1) print the mutation result,
+ 2) re-read order history and print the affected order ID + status,
+ 3) re-read account summary and portfolio and print updated cash/position values.
+- Do not claim success unless these verification reads match the claim.
+
+MISMATCH RECONCILIATION RULE:
+- If user says the result is wrong/missing, do not apologize repeatedly and do not guess.
+- Run one reconciliation script that re-fetches:
+ `get_account_summary`, `get_portfolio`, and `get_order_history`.
+- Report exactly what is in DB now.
+- If mismatch persists after one reconciliation attempt, perform one corrective write attempt if valid.
+- Transfer to human only if corrective attempt is impossible or fails with a concrete tool limitation.
+
+TRANSFER GATE (STRICT):
+- Do NOT transfer solely due to temporary execution errors or uncertainty.
+- Transfer only when:
+ 1) request is out of policy, or
+ 2) required capability is unavailable, confirmed by concrete tool error after retries.
+
+TRANSFER TO HUMAN AGENT:
+To transfer, make an execute_python call with code: actions.transfer_to_human_agents("summary of the issue"), then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
+
+
+# Stocks trading agent policy
+
+As a stocks trading support agent, you can help users:
+
+- authenticate trading account via email lookup
+- review account profile, cash balance, holdings, quotes, and order history
+- place market and limit buy/sell orders
+- cancel open orders
+- provide neutral factual market data from available tools
+
+At the beginning of the conversation, authenticate the user account before any trade action.
+
+Authentication is valid only when BOTH are true:
+- the user explicitly provided an email in conversation, and
+- a tool call to `find_account_id_by_email` succeeds.
+
+You must never claim authentication success before those two conditions are met.
+If email is missing, ask for email first.
+If lookup fails, ask for corrected email and do not proceed to account/trading operations.
+
+You can only operate on one authenticated account per conversation and must deny requests to access or trade another account.
+
+Before taking any action that updates the database (place/cancel orders), list action details and obtain explicit user confirmation (yes) to proceed.
+
+After user confirmation for a write action, you must execute the write via tool call before any success statement.
+Do not provide text-only completion claims for place/cancel operations.
+
+All account/order facts you present (cash, positions, order status/history) must come from current tool output, not memory.
+Before presenting state to the user, re-read the relevant data from tools in the same execution flow.
+
+Do not provide financial advice, predictions, or recommendations. Only provide factual tool-backed information.
+
+Do not fabricate symbols, account ids, order ids, prices, quantities, balances, or execution outcomes.
+
+After each successful mutation (place/cancel order), you must verify by re-reading:
+- order history (including affected order id + status), and
+- account summary / portfolio (cash + position updates).
+
+Do not claim success unless verification output matches the claim.
+
+Any claimed order id, status, balance, or position update must be directly traceable to current tool output.
+
+You should at most make one tool call at a time, and if you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call at the same time.
+
+You should deny requests that violate policy or require unavailable capabilities.
+
+Transfer to a human agent if and only if the request cannot be handled with available actions.
+Do not transfer solely due to uncertainty or temporary execution issues.
+Transfer only for:
+- out-of-policy requests, or
+- confirmed tool capability limitations after retry/reconciliation.
+
+If user reports a mismatch, run one reconciliation by re-reading account summary, portfolio, and order history and report what the DB currently shows.
+Only after reconciliation and (if applicable) one valid corrective attempt may you transfer.
+
+To transfer, first call transfer_to_human_agents, then send: 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.'
+
+## Domain basics
+
+- All prices are in USD.
+- Market quote values are demo values from the local tools database.
+- A market order executes immediately at the current quote price.
+- A limit order is filled immediately only when its condition is met; otherwise it remains open.
+- Cancelling is allowed only for open orders.
+- Positions track quantity and average cost per symbol.
+- Cash balance and positions are updated only by successful trade mutations.
+
diff --git a/sample_solutions/AgenticCodeExecution/examples/stocks/stocks_data_model.py b/sample_solutions/AgenticCodeExecution/examples/stocks/stocks_data_model.py
new file mode 100644
index 00000000..8c7cc0cf
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/stocks/stocks_data_model.py
@@ -0,0 +1,115 @@
+"""Data models for the stocks domain."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any, Dict, Literal, Optional
+
+from pydantic import BaseModel, Field
+
+
+OrderSide = Literal["buy", "sell"]
+OrderType = Literal["market", "limit"]
+OrderStatus = Literal["open", "filled", "cancelled"]
+
+
+class Position(BaseModel):
+ """Current holdings for a stock symbol."""
+
+ quantity: int = Field(description="Number of shares held")
+ avg_cost: float = Field(description="Average cost basis per share")
+
+
+class Account(BaseModel):
+ """Trading account profile and portfolio snapshot."""
+
+ account_id: str = Field(description="Unique account identifier")
+ name: str = Field(description="Account holder name")
+ email: str = Field(description="Account holder email")
+ cash_balance: float = Field(description="Available cash balance")
+ positions: Dict[str, Position] = Field(
+ description="Positions keyed by stock symbol"
+ )
+ watchlist: list[str] = Field(description="Saved watchlist symbols")
+ order_ids: list[str] = Field(description="Order IDs associated with this account")
+
+
+class Quote(BaseModel):
+ """Current market quote for a tradable symbol."""
+
+ symbol: str = Field(description="Ticker symbol")
+ name: str = Field(description="Company name")
+ sector: str = Field(description="Market sector")
+ current_price: float = Field(description="Latest traded price")
+ day_open: float = Field(description="Opening price for the day")
+ day_high: float = Field(description="Highest price for the day")
+ day_low: float = Field(description="Lowest price for the day")
+ volume: int = Field(description="Trading volume for the day")
+
+
+class Order(BaseModel):
+ """Trade order record."""
+
+ order_id: str = Field(description="Unique order identifier")
+ account_id: str = Field(description="Account that placed the order")
+ symbol: str = Field(description="Ticker symbol")
+ side: OrderSide = Field(description="Buy or sell direction")
+ order_type: OrderType = Field(description="Order execution type")
+ quantity: int = Field(description="Number of shares")
+ limit_price: Optional[float] = Field(
+ default=None,
+ description="Limit price for limit orders",
+ )
+ status: OrderStatus = Field(description="Current order status")
+ executed_price: Optional[float] = Field(
+ default=None,
+ description="Execution price when filled",
+ )
+ created_at: str = Field(description="Order creation timestamp in ISO format")
+ filled_at: Optional[str] = Field(
+ default=None,
+ description="Fill timestamp in ISO format",
+ )
+
+
+class StocksDB(BaseModel):
+ """Database containing stocks accounts, quotes, and orders."""
+
+ model_config = {"extra": "allow"}
+
+ accounts: Dict[str, Account] = Field(
+ description="Dictionary of accounts indexed by account_id"
+ )
+ market: Dict[str, Quote] = Field(
+ description="Dictionary of quotes indexed by ticker symbol"
+ )
+ orders: Dict[str, Order] = Field(
+ description="Dictionary of orders indexed by order_id"
+ )
+ meta: Dict[str, Any] = Field(default_factory=dict, description="Metadata section")
+
+ _db_path: str = ""
+
+ @classmethod
+ def load(cls, path: str | Path) -> "StocksDB":
+ """Load the database from a JSON file."""
+ with open(path, "r", encoding="utf-8") as handle:
+ data = json.load(handle)
+ db = cls.model_validate(data)
+ db._db_path = str(path)
+ return db
+
+ def save(self) -> None:
+ """Save the database back to the JSON file."""
+ if self._db_path:
+ with open(self._db_path, "w", encoding="utf-8") as handle:
+ json.dump(self.model_dump(exclude={"_db_path"}, mode="json"), handle, indent=2)
+
+ def get_statistics(self) -> Dict[str, Any]:
+ """Get high-level statistics for the stocks database."""
+ return {
+ "num_accounts": len(self.accounts),
+ "num_symbols": len(self.market),
+ "num_orders": len(self.orders),
+ }
diff --git a/sample_solutions/AgenticCodeExecution/examples/triage/data/.gitkeep b/sample_solutions/AgenticCodeExecution/examples/triage/data/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/sample_solutions/AgenticCodeExecution/examples/triage/mcp_triage_server.py b/sample_solutions/AgenticCodeExecution/examples/triage/mcp_triage_server.py
new file mode 100644
index 00000000..974a93b3
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/triage/mcp_triage_server.py
@@ -0,0 +1,618 @@
+#!/usr/bin/env python3
+"""
+MCP Server for Incident Triage Tools - Fully Standalone
+
+This domain is intentionally non-DB-centric and focused on chainable operational checks.
+"""
+
+import argparse
+import json
+import socket
+import ssl
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List
+
+from fastmcp import FastMCP
+
+# Add parent directory to sys.path for shared modules (error_hints)
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from error_hints import analyze_execution_error
+
+
+mcp = FastMCP(
+ "Incident Triage Tools Server",
+ instructions="""You are an incident triage support agent. Use these tools to help users with:
+- Checking HTTP endpoint health and latency
+- Resolving DNS and testing TCP connectivity
+- Inspecting TLS certificate metadata
+- Checking public vendor status pages
+- Building structured triage summaries and customer updates
+
+Always gather evidence first, then summarize impact, likely cause, and next steps.""",
+)
+
+
+_STATUS_APIS = {
+ "github": "https://www.githubstatus.com/api/v2/status.json",
+ "openai": "https://status.openai.com/api/v2/status.json",
+ "cloudflare": "https://www.cloudflarestatus.com/api/v2/status.json",
+ "slack": "https://status.slack.com/api/v2.0.0/current",
+}
+
+
+def _now_iso() -> str:
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def _http_get_json(url: str, timeout_sec: int = 8) -> Dict[str, Any]:
+ request = urllib.request.Request(
+ url,
+ headers={
+ "User-Agent": "mcp-triage-server/1.0",
+ "Accept": "application/json",
+ },
+ )
+ with urllib.request.urlopen(request, timeout=timeout_sec) as response:
+ body = response.read().decode("utf-8", errors="replace")
+ return json.loads(body)
+
+
+def _safe_json_loads(raw: str, default: Any) -> Any:
+ try:
+ return json.loads(raw)
+ except Exception:
+ return default
+
+
+def _severity_rank(severity: str) -> int:
+ order = {"critical": 4, "high": 3, "medium": 2, "low": 1}
+ return order.get((severity or "").strip().lower(), 0)
+
+
+def _infer_severity(http_ok: bool, tcp_ok: bool, vendor_indicator: str, error_text: str) -> str:
+ text = (error_text or "").lower()
+ if (not tcp_ok and not http_ok) or "outage" in vendor_indicator.lower() or "critical" in text:
+ return "critical"
+ if (not http_ok) or "major" in vendor_indicator.lower() or "timeout" in text:
+ return "high"
+ if "degraded" in vendor_indicator.lower() or "latency" in text or "error" in text:
+ return "medium"
+ return "low"
+
+
+@mcp.tool()
+def check_http_endpoint(url: str, timeout_sec: int = 8, session_id: str = "") -> str:
+ """Check HTTP endpoint health and latency.
+
+ Args:
+ url: Endpoint URL, such as 'https://api.example.com/health'.
+ timeout_sec: Timeout in seconds.
+
+ Returns:
+ A JSON STRING with status_code, latency_ms, ok flag, and response snippet.
+ """
+ _ = session_id
+ start = time.perf_counter()
+ request = urllib.request.Request(url, headers={"User-Agent": "mcp-triage-server/1.0"})
+
+ result: Dict[str, Any] = {
+ "url": url,
+ "checked_at": _now_iso(),
+ "ok": False,
+ "status_code": None,
+ "latency_ms": None,
+ "error": None,
+ "response_excerpt": "",
+ }
+
+ try:
+ with urllib.request.urlopen(request, timeout=timeout_sec) as response:
+ elapsed_ms = round((time.perf_counter() - start) * 1000, 2)
+ body = response.read(400).decode("utf-8", errors="replace")
+ result.update(
+ {
+ "ok": 200 <= int(response.status) < 400,
+ "status_code": int(response.status),
+ "latency_ms": elapsed_ms,
+ "response_excerpt": body,
+ }
+ )
+ except urllib.error.HTTPError as exc:
+ elapsed_ms = round((time.perf_counter() - start) * 1000, 2)
+ body = exc.read(400).decode("utf-8", errors="replace") if exc.fp else ""
+ result.update(
+ {
+ "ok": False,
+ "status_code": int(exc.code),
+ "latency_ms": elapsed_ms,
+ "error": str(exc),
+ "response_excerpt": body,
+ }
+ )
+ except Exception as exc:
+ elapsed_ms = round((time.perf_counter() - start) * 1000, 2)
+ result.update({"latency_ms": elapsed_ms, "error": str(exc)})
+
+ return json.dumps(result, indent=2)
+
+
+@mcp.tool()
+def run_latency_probe(url: str, attempts: int = 3, timeout_sec: int = 6, session_id: str = "") -> str:
+ """Run repeated HTTP checks to estimate latency stability.
+
+ Args:
+ url: Endpoint URL.
+ attempts: Number of probe attempts (1-10).
+ timeout_sec: Timeout per request.
+
+ Returns:
+ A JSON STRING with per-attempt latencies and summary stats.
+ """
+ _ = session_id
+ attempts = max(1, min(10, int(attempts)))
+ samples: List[float] = []
+ statuses: List[int] = []
+ errors: List[str] = []
+
+ for _idx in range(attempts):
+ parsed = _safe_json_loads(check_http_endpoint(url=url, timeout_sec=timeout_sec), {})
+ latency = parsed.get("latency_ms")
+ if isinstance(latency, (int, float)):
+ samples.append(float(latency))
+ status_code = parsed.get("status_code")
+ if isinstance(status_code, int):
+ statuses.append(status_code)
+ if parsed.get("error"):
+ errors.append(str(parsed.get("error")))
+
+ summary = {
+ "url": url,
+ "attempts": attempts,
+ "latencies_ms": samples,
+ "min_ms": min(samples) if samples else None,
+ "max_ms": max(samples) if samples else None,
+ "avg_ms": round(sum(samples) / len(samples), 2) if samples else None,
+ "status_codes": statuses,
+ "errors": errors,
+ "checked_at": _now_iso(),
+ }
+ return json.dumps(summary, indent=2)
+
+
+@mcp.tool()
+def resolve_hostname(hostname: str, session_id: str = "") -> str:
+ """Resolve hostname to IPv4/IPv6 addresses.
+
+ Args:
+ hostname: Hostname such as 'api.example.com'.
+
+ Returns:
+ A JSON STRING with resolved addresses.
+ """
+ _ = session_id
+ records = socket.getaddrinfo(hostname, None)
+ addresses = sorted({rec[4][0] for rec in records})
+ return json.dumps(
+ {
+ "hostname": hostname,
+ "resolved_addresses": addresses,
+ "count": len(addresses),
+ "checked_at": _now_iso(),
+ },
+ indent=2,
+ )
+
+
+@mcp.tool()
+def check_tcp_port(host: str, port: int, timeout_sec: int = 4, session_id: str = "") -> str:
+ """Check TCP connectivity to a host and port.
+
+ Args:
+ host: Target hostname or IP.
+ port: TCP port.
+ timeout_sec: Timeout in seconds.
+
+ Returns:
+ A JSON STRING with connectivity result and latency.
+ """
+ _ = session_id
+ start = time.perf_counter()
+ result = {
+ "host": host,
+ "port": int(port),
+ "ok": False,
+ "latency_ms": None,
+ "error": None,
+ "checked_at": _now_iso(),
+ }
+
+ try:
+ with socket.create_connection((host, int(port)), timeout=timeout_sec):
+ result["ok"] = True
+ result["latency_ms"] = round((time.perf_counter() - start) * 1000, 2)
+ except Exception as exc:
+ result["latency_ms"] = round((time.perf_counter() - start) * 1000, 2)
+ result["error"] = str(exc)
+
+ return json.dumps(result, indent=2)
+
+
+@mcp.tool()
+def inspect_tls_certificate(host: str, port: int = 443, timeout_sec: int = 5, session_id: str = "") -> str:
+ """Inspect basic TLS certificate metadata for a host.
+
+ Args:
+ host: Target hostname.
+ port: TLS port (default 443).
+ timeout_sec: Timeout in seconds.
+
+ Returns:
+ A JSON STRING with cert subject, issuer, and validity dates.
+ """
+ _ = session_id
+ context = ssl.create_default_context()
+ with socket.create_connection((host, int(port)), timeout=timeout_sec) as sock:
+ with context.wrap_socket(sock, server_hostname=host) as tls_sock:
+ cert = tls_sock.getpeercert()
+
+ def _flatten_name(name_items: List[Any]) -> Dict[str, str]:
+ flattened: Dict[str, str] = {}
+ for item in name_items:
+ for key, value in item:
+ flattened[key] = value
+ return flattened
+
+ data = {
+ "host": host,
+ "port": int(port),
+ "subject": _flatten_name(cert.get("subject", [])),
+ "issuer": _flatten_name(cert.get("issuer", [])),
+ "not_before": cert.get("notBefore"),
+ "not_after": cert.get("notAfter"),
+ "serial_number": cert.get("serialNumber"),
+ "checked_at": _now_iso(),
+ }
+ return json.dumps(data, indent=2)
+
+
+@mcp.tool()
+def get_public_status(service: str, timeout_sec: int = 8, session_id: str = "") -> str:
+ """Get public status from a known vendor status API.
+
+ Args:
+ service: One of 'github', 'openai', 'cloudflare', 'slack'.
+ timeout_sec: Request timeout in seconds.
+
+ Returns:
+ A JSON STRING with normalized status indicator and description.
+ """
+ _ = session_id
+ key = service.strip().lower()
+ if key not in _STATUS_APIS:
+ raise ValueError("Unsupported service. Choose: github, openai, cloudflare, slack")
+
+ url = _STATUS_APIS[key]
+ payload = _http_get_json(url, timeout_sec=timeout_sec)
+
+ indicator = "unknown"
+ description = ""
+
+ if key == "slack":
+ status = payload.get("status", "").lower()
+ if status in {"active", "ok"}:
+ indicator = "none"
+ description = "All systems operational"
+ elif status:
+ indicator = status
+ description = payload.get("date_updated", "")
+ else:
+ page_status = payload.get("status", {})
+ indicator = page_status.get("indicator", "unknown")
+ description = page_status.get("description", "")
+
+ return json.dumps(
+ {
+ "service": key,
+ "status_api": url,
+ "indicator": indicator,
+ "description": description,
+ "checked_at": _now_iso(),
+ "raw": payload,
+ },
+ indent=2,
+ )
+
+
+@mcp.tool()
+def summarize_incident_signals(
+ service_name: str,
+ http_result_json: str = "",
+ tcp_result_json: str = "",
+ public_status_json: str = "",
+ error_text: str = "",
+ session_id: str = "",
+) -> str:
+ """Summarize triage signals into severity, likely cause, and recommended next steps.
+
+ Args:
+ service_name: Name of affected service.
+ http_result_json: JSON string from check_http_endpoint/run_latency_probe.
+ tcp_result_json: JSON string from check_tcp_port.
+ public_status_json: JSON string from get_public_status.
+ error_text: Additional observed error text/log snippet.
+
+ Returns:
+ A JSON STRING triage summary with severity and recommended actions.
+ """
+ _ = session_id
+ http_data = _safe_json_loads(http_result_json, {})
+ tcp_data = _safe_json_loads(tcp_result_json, {})
+ vendor_data = _safe_json_loads(public_status_json, {})
+
+ http_ok = bool(http_data.get("ok", False))
+ tcp_ok = bool(tcp_data.get("ok", False))
+ vendor_indicator = str(vendor_data.get("indicator", ""))
+
+ severity = _infer_severity(http_ok=http_ok, tcp_ok=tcp_ok, vendor_indicator=vendor_indicator, error_text=error_text)
+
+ likely_causes: List[str] = []
+ if not tcp_ok:
+ likely_causes.append("Network path or service port is unreachable")
+ if tcp_ok and not http_ok:
+ likely_causes.append("Application layer issue (5xx/4xx, timeout, or upstream dependency)")
+ if vendor_indicator and vendor_indicator not in {"none", "unknown", "active", "ok"}:
+ likely_causes.append(f"Upstream provider incident indicated by status API: {vendor_indicator}")
+ if not likely_causes:
+ likely_causes.append("No hard failure detected; investigate intermittent latency or client-side issues")
+
+ recommended_actions = [
+ "Validate blast radius across regions/endpoints",
+ "Compare current error rate vs baseline",
+ "Check recent deployments/config changes",
+ "Notify stakeholders with next update ETA",
+ ]
+
+ if not tcp_ok:
+ recommended_actions.insert(0, "Escalate to network/infrastructure on-call")
+ elif not http_ok:
+ recommended_actions.insert(0, "Inspect application logs and upstream dependency health")
+
+ summary = {
+ "service_name": service_name,
+ "severity": severity,
+ "signals": {
+ "http_ok": http_ok,
+ "http_status_code": http_data.get("status_code"),
+ "tcp_ok": tcp_ok,
+ "vendor_indicator": vendor_indicator,
+ "error_text": error_text,
+ },
+ "likely_causes": likely_causes,
+ "recommended_actions": recommended_actions,
+ "generated_at": _now_iso(),
+ }
+ return json.dumps(summary, indent=2)
+
+
+@mcp.tool()
+def create_incident_report(
+ service_name: str,
+ severity: str,
+ impact_summary: str,
+ findings: str,
+ next_actions: str,
+ session_id: str = "",
+) -> str:
+ """Create a structured incident report object from triage findings.
+
+ Args:
+ service_name: Affected service name.
+ severity: low|medium|high|critical.
+ impact_summary: Human-readable impact statement.
+ findings: JSON or plain-text findings summary.
+ next_actions: JSON array string or plain-text next actions.
+
+ Returns:
+ A JSON STRING incident report with generated incident_id.
+ """
+ _ = session_id
+ sev = (severity or "").strip().lower()
+ if sev not in {"low", "medium", "high", "critical"}:
+ raise ValueError("severity must be one of: low, medium, high, critical")
+
+ findings_obj = _safe_json_loads(findings, findings)
+ actions_obj = _safe_json_loads(next_actions, [next_actions])
+ if isinstance(actions_obj, str):
+ actions_obj = [actions_obj]
+
+ ts = datetime.now(timezone.utc)
+ incident_id = f"INC-{ts.strftime('%Y%m%d-%H%M%S')}"
+
+ report = {
+ "incident_id": incident_id,
+ "service_name": service_name,
+ "severity": sev,
+ "severity_rank": _severity_rank(sev),
+ "status": "investigating",
+ "impact_summary": impact_summary,
+ "findings": findings_obj,
+ "next_actions": actions_obj,
+ "created_at": _now_iso(),
+ }
+ return json.dumps(report, indent=2)
+
+
+@mcp.tool()
+def draft_customer_update(
+ service_name: str,
+ severity: str,
+ impact_summary: str,
+ current_status: str,
+ next_update_eta_minutes: int = 30,
+ session_id: str = "",
+) -> str:
+ """Draft a concise customer-facing status update.
+
+ Args:
+ service_name: Affected service name.
+ severity: low|medium|high|critical.
+ impact_summary: Customer impact summary.
+ current_status: Current mitigation/investigation status.
+ next_update_eta_minutes: ETA for next update.
+
+ Returns:
+ A plain-text status update message.
+ """
+ _ = session_id
+ sev = (severity or "").strip().lower()
+ eta = max(5, int(next_update_eta_minutes))
+
+ message = (
+ f"Incident Update ({service_name})\n"
+ f"Severity: {sev.upper()}\n"
+ f"Impact: {impact_summary}\n"
+ f"Current Status: {current_status}\n"
+ f"Next Update: in approximately {eta} minutes."
+ )
+ return message
+
+
+@mcp.tool()
+def calculate(expression: str, session_id: str = "") -> str:
+ """Calculate the result of a mathematical expression."""
+ _ = session_id
+ if not all(char in "0123456789+-*/(). " for char in expression):
+ raise ValueError("Invalid characters in expression")
+ return str(round(float(eval(expression, {"__builtins__": None}, {})), 6))
+
+
+@mcp.tool()
+def transfer_to_human_agents(summary: str, session_id: str = "") -> str:
+ """Transfer the customer to a human incident commander/on-call engineer."""
+ _ = summary
+ _ = session_id
+ return "Transfer successful"
+
+
+def _get_data_model_defs() -> Dict[str, dict]:
+ return {
+ "HealthCheck": {
+ "description": "HTTP health check result",
+ "properties": {
+ "url": {"type": "string"},
+ "ok": {"type": "boolean"},
+ "status_code": {"type": "integer"},
+ "latency_ms": {"type": "number"},
+ "error": {"type": "string"},
+ },
+ },
+ "IncidentSummary": {
+ "description": "Machine-readable triage summary",
+ "properties": {
+ "service_name": {"type": "string"},
+ "severity": {"type": "string"},
+ "likely_causes": {"type": "array"},
+ "recommended_actions": {"type": "array"},
+ },
+ },
+ "IncidentReport": {
+ "description": "Structured incident ticket/report payload",
+ "properties": {
+ "incident_id": {"type": "string"},
+ "service_name": {"type": "string"},
+ "severity": {"type": "string"},
+ "status": {"type": "string"},
+ "impact_summary": {"type": "string"},
+ "findings": {"type": "object"},
+ "next_actions": {"type": "array"},
+ },
+ },
+ }
+
+
+def _get_tool_metadata_payload() -> Dict[str, Any]:
+ ordered_actions = [
+ "check_http_endpoint",
+ "run_latency_probe",
+ "resolve_hostname",
+ "check_tcp_port",
+ "inspect_tls_certificate",
+ "get_public_status",
+ "summarize_incident_signals",
+ "create_incident_report",
+ "draft_customer_update",
+ "calculate",
+ "transfer_to_human_agents",
+ ]
+
+ return {
+ "ordered_actions": ordered_actions,
+ "return_types": {
+ "check_http_endpoint": "str (JSON)",
+ "run_latency_probe": "str (JSON)",
+ "resolve_hostname": "str (JSON)",
+ "check_tcp_port": "str (JSON)",
+ "inspect_tls_certificate": "str (JSON)",
+ "get_public_status": "str (JSON)",
+ "summarize_incident_signals": "str (JSON)",
+ "create_incident_report": "str (JSON)",
+ "draft_customer_update": "str",
+ "calculate": "str",
+ "transfer_to_human_agents": "str",
+ },
+ "semantic_types": {
+ "check_http_endpoint": "HealthCheck",
+ "run_latency_probe": "dict[latency_stats]",
+ "resolve_hostname": "dict[hostname_resolution]",
+ "check_tcp_port": "dict[tcp_connectivity]",
+ "inspect_tls_certificate": "dict[tls_certificate]",
+ "get_public_status": "dict[provider_status]",
+ "summarize_incident_signals": "IncidentSummary",
+ "create_incident_report": "IncidentReport",
+ },
+ "data_model_defs": _get_data_model_defs(),
+ }
+
+
+@mcp.tool()
+def get_execution_error_hint(error_msg: str, code: str = "", session_id: str = "") -> str:
+ """Return a recovery hint for sandbox execution/tool errors."""
+ _ = session_id
+ return analyze_execution_error(error_msg=error_msg, code=code, domain="triage")
+
+
+@mcp.tool()
+def get_tool_metadata(session_id: str = "") -> str:
+ """Return metadata used to build execute_python action/data-model description."""
+ _ = session_id
+ return json.dumps(_get_tool_metadata_payload())
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Incident Triage MCP Server")
+ parser.add_argument("--port", type=int, default=5050, help="Port to run the SSE server on")
+ parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
+ parser.add_argument(
+ "--transport",
+ choices=["sse", "stdio"],
+ default="sse",
+ help="Transport protocol to use",
+ )
+
+ args = parser.parse_args()
+
+ print("\n🚀 Starting Incident Triage MCP Server...")
+ print(f" Transport: {args.transport}")
+ if args.transport == "sse":
+ print(f" Host: {args.host}")
+ print(f" Port: {args.port}")
+ print(f" SSE endpoint: http://{args.host}:{args.port}/sse")
+
+ mcp.run(transport=args.transport, host=args.host, port=args.port)
diff --git a/sample_solutions/AgenticCodeExecution/examples/triage/triage-system-prompt.txt b/sample_solutions/AgenticCodeExecution/examples/triage/triage-system-prompt.txt
new file mode 100644
index 00000000..dc14c3a3
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/examples/triage/triage-system-prompt.txt
@@ -0,0 +1,270 @@
+
+You are a customer service agent that helps the user according to the provided below.
+
+============================================================
+SECTION 1 — GENERIC EXECUTE_PYTHON INSTRUCTIONS
+============================================================
+
+HOW YOU WORK:
+You have ONE tool: execute_python. It runs Python code in a sandbox.
+Inside that sandbox, an `actions` object is pre-loaded with methods you call to perform operations (look up records, search options, modify state, etc.).
+The full list of available actions and their parameters is documented in the execute_python tool description — READ IT before writing code.
+
+TURN STRUCTURE (STRICT):
+In each turn you must do EXACTLY ONE of the following — never both:
+ A) Send a text message to the user, OR
+ B) Make an execute_python call.
+You MUST NOT combine a message and a tool call in the same turn.
+In particular, if execute_python returned an error, your next turn must be ONLY an execute_python call with fixed code — no text, no apology, no explanation.
+
+CRITICAL — WRITE COMPLETE SCRIPTS:
+A single execute_python call can contain MANY actions.* calls chained together in one script.
+You MUST combine all related steps into ONE execute_python call.
+Do NOT make separate execute_python calls for each individual action.
+
+Think about what information you need, then write ONE script that gathers and processes ALL of it.
+Only make a second execute_python call if the first one fails or if you need user input before continuing.
+
+SANDBOX ENVIRONMENT:
+Your code runs in a restricted Python sandbox. These constraints apply:
+- ALLOWED IMPORTS: json, math, datetime, time, re, collections, itertools — nothing else
+- NO file I/O: open(), os, sys, subprocess, pathlib are all blocked
+- NO exec(), eval(), compile()
+- NO dunder attributes: __name__, __class__, __dict__ etc. are blocked
+- NO input(): extract information from the conversation instead
+- The `actions` object is ALREADY AVAILABLE — do not import it
+
+CRITICAL INPUT BAN (HIGHEST PRIORITY):
+- NEVER use input() in execute_python code.
+- NEVER use placeholder variables such as `input`, `Input`, or `user_input`.
+- NEVER write code like `email = input(...)` or `value = Input`.
+- If any value is missing, ask the user in a normal assistant message (not in Python code).
+- In execute_python code, only use values already present in conversation/tool outputs.
+- Any script using input() is invalid and must be rewritten before execution.
+
+CODE GENERATION RULES:
+
+1. Treat actions.* outputs by TYPE:
+ - If output is structured data (object/list), parse with json.loads() before field access.
+ - If output is a scalar (e.g., identifier/status string), use it directly.
+
+ Safe pattern:
+ import json
+ raw = actions.ANY_METHOD(args)
+ # parse when raw looks like structured JSON; otherwise use raw directly
+ data = json.loads(raw) if isinstance(raw, str) and raw.lstrip().startswith('[') else raw
+ # for JSON objects, use the same idea: parse when output is structured JSON text
+
+ ❌ WRONG — accessing fields on the raw JSON string:
+ result = actions.some_write_method(...)
+ print(result['status']) # CRASH: result is a STRING, not a dict
+
+ ✅ CORRECT — parse first, then access:
+ result = json.loads(actions.some_write_method(...))
+ print(result['status'])
+
+2. ALWAYS print results — print() is the ONLY way to see output:
+ print(data)
+
+3. DICT vs LIST — read the signature line for each action:
+ Many actions return DICTS (not lists). Iterating a dict gives you KEYS (strings), not objects.
+
+ ❌ WRONG — treats dict as list of objects:
+ for item in data:
+ print(item['name']) # CRASH: item is a string key, not a dict
+
+ ✅ CORRECT — use .items() for dicts:
+ for key, value in data.items():
+ print(key, value)
+
+ When unsure, print the data first: print(type(data), data)
+
+4. STATELESS: Variables do NOT persist between execute_python calls.
+ Put ALL steps in ONE script.
+
+5. NEVER fabricate identifiers or option values.
+ Extract concrete values from tool outputs and reuse them exactly.
+ Never pass placeholder tokens like "user_id", "order_id", "item_id", "payment_method_id"
+ as actual values. Those are parameter NAMES, not real values.
+
+6. PRE-FLIGHT CHECKLIST before any state-changing action:
+ Before calling any write action, verify all required arguments come from current
+ data in THIS script and satisfy preconditions.
+
+ Generic checks:
+ - Every argument variable is defined before use (no undefined names).
+ - No input()/Input/user_input usage anywhere in the script.
+ - Entities referenced by the action are confirmed to exist in retrieved data.
+ - Current state allows the action (e.g., status/eligibility/business constraints).
+ - Argument values are concrete runtime values (not placeholder strings).
+
+ Pattern:
+ # 1) Read current state
+ state = json.loads(actions.some_read_method(...))
+ print(state)
+
+ # 2) Validate preconditions and required values
+ # (extract concrete values from state; do not invent placeholders)
+
+ # 3) Execute write action only after checks
+ result = json.loads(actions.some_write_method(...))
+ print(result)
+
+7. LOOKUP + EXECUTION GUARDRAILS (GENERIC):
+ A) LOOKUP FALLBACK (max 2 tries):
+ - Try one primary lookup strategy.
+ - If it fails, try one alternative strategy.
+ - If both fail, DO NOT guess values and DO NOT run write actions.
+ Ask the user for the missing value(s) in a normal assistant message.
+
+ B) NEVER USE PLACEHOLDER LITERALS AS REAL VALUES:
+ Parameter names are not runtime values.
+ Forbidden as actual values: "user_id", "order_id", "item_id", "product_id",
+ "payment_method_id", "email", "first_name", "last_name", "zip", etc.
+
+ C) READ-THEN-WRITE DISCIPLINE:
+ Before any write action, explicitly read current state and confirm:
+ - target entity exists
+ - operation is allowed in current state
+ - selected replacement/option/value is valid in current data
+
+ D) ARGUMENT SANITY CHECKS BEFORE WRITE:
+ For each required argument:
+ - it is defined in this script
+ - it is non-empty and concrete (not placeholder text)
+ - it was derived from conversation/tool output
+ Print chosen argument values before the write call.
+
+ E) TWO-PHASE SCRIPT TEMPLATE:
+ - Phase 1: gather and print all required facts
+ - Phase 2: execute write action only if checks pass
+
+8. MANDATORY PRE-EXECUTION SELF-CHECK (RUN THIS MENTALLY BEFORE EVERY execute_python CALL):
+ - No `input()`, `input`, `Input`, or `user_input` anywhere in code.
+ - No placeholder literals used as real values (`"user_id"`, `"order_id"`, etc.).
+ - Every variable used in arguments is defined in THIS script.
+ - For every critical value, print it before write actions.
+ - If a lookup failed twice, DO NOT continue with write actions; ask user for corrected value.
+
+9. STRICT LOOKUP FAILURE RULE:
+ If authentication/lookup fails (target entity not found), you MUST stop mutation flow.
+ Do not call write actions after unresolved lookup failure.
+ Your next action is either:
+ - one alternative lookup attempt, OR
+ - ask the user for corrected identifying information.
+
+SILENT ERROR RECOVERY (MANDATORY):
+When execute_python returns an error, you MUST:
+1. NOT produce any text message — no apology, no explanation, no "let me try again"
+2. Immediately make another execute_python call with corrected code
+3. The user must NEVER know an error occurred
+
+This means your very next turn after an error is ONLY a tool call — zero words to the user.
+
+❌ WRONG: "I apologize for the error. Let me try a different approach." + execute_python
+❌ WRONG: "Let me fix that." + execute_python
+❌ WRONG: Any text at all before or alongside the retry
+✅ CORRECT: execute_python (with fixed code, no accompanying text)
+
+Only speak to the user when you have a successful result or need information from them.
+If after 3 retries you still cannot complete the task, you may say there is a temporary technical issue — but never mention implementation details.
+
+ANTI-LOOP GUARD (GENERIC):
+- If you hit the same error class twice in a row (e.g., repeated "not found" or repeated type/index error),
+ stop retrying variations and switch strategy:
+ 1) one alternative lookup/validation path, then
+ 2) ask user for the missing/corrected value if still unresolved.
+- Do not burn steps by repeating near-identical failing scripts.
+
+============================================================
+SECTION 2 — USE-CASE SPECIFIC EXAMPLES (INCIDENT TRIAGE)
+============================================================
+
+TRIAGE WORKFLOW (MANDATORY ORDER):
+1) Collect signals (HTTP, DNS, TCP/TLS, public status)
+2) Summarize severity + likely causes
+3) Produce a structured incident report
+4) Draft a customer-facing update
+
+CRITICAL — WRITE COMPLETE SCRIPTS EXAMPLE:
+
+❌ WRONG (splitting checks into many separate turns):
+ Turn 1: execute_python → actions.check_http_endpoint(...)
+ Turn 2: execute_python → actions.resolve_hostname(...)
+ Turn 3: execute_python → actions.check_tcp_port(...)
+
+✅ CORRECT (single complete triage script):
+ execute_python →
+ import json
+ http_raw = actions.check_http_endpoint("https://api.github.com")
+ dns_raw = actions.resolve_hostname("api.github.com")
+ tcp_raw = actions.check_tcp_port("api.github.com", 443)
+ status_raw = actions.get_public_status("github")
+
+ summary_raw = actions.summarize_incident_signals(
+ service_name="github-api",
+ http_result_json=http_raw,
+ tcp_result_json=tcp_raw,
+ public_status_json=status_raw,
+ error_text="elevated 5xx observed"
+ )
+
+ print(http_raw)
+ print(dns_raw)
+ print(tcp_raw)
+ print(status_raw)
+ print(summary_raw)
+
+TRIAGE FACTUALITY RULE:
+- Never claim outage/recovery without supporting tool evidence.
+- Every severity/cause claim must reference current tool outputs from this run.
+- If signals conflict, explicitly say "inconclusive" and gather one additional signal.
+
+SEVERITY DISCIPLINE:
+- Use low/medium/high/critical only.
+- If network path and HTTP both fail, treat as at least high.
+- If public status indicates major outage, escalate severity accordingly.
+
+TRANSFER TO HUMAN AGENT:
+Transfer only when the request is outside available capabilities or urgent human coordination is required.
+To transfer, make an execute_python call with code: actions.transfer_to_human_agents("summary"), then send the message 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.' to the user.
+
+
+# Incident triage agent policy
+
+As an incident triage agent, you can help users:
+
+- check endpoint health and latency
+- resolve DNS and test TCP connectivity
+- inspect TLS certificate metadata
+- review public vendor status
+- summarize incident severity/causes
+- generate structured incident reports and customer updates
+
+You must gather evidence before conclusions.
+Do not speculate or fabricate status details, timelines, or root causes.
+
+You should provide factual, concise operational updates and actionable next steps.
+
+For critical incidents, prioritize:
+1) impact statement,
+2) current status,
+3) immediate mitigation actions,
+4) next update ETA.
+
+Do not claim issue resolution unless verified by fresh checks.
+
+You should at most make one tool call at a time, and if you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call at the same time.
+
+You should deny requests that violate policy or require unavailable capabilities.
+
+Transfer to a human agent if and only if the request cannot be handled with available actions. To transfer, first call transfer_to_human_agents, then send: 'YOU ARE BEING TRANSFERRED TO A HUMAN AGENT. PLEASE HOLD ON.'
+
+## Domain basics
+
+- Endpoint health is assessed from HTTP status, latency, and error details.
+- Network reachability is assessed from DNS resolution and TCP connectivity.
+- TLS inspection provides certificate metadata only, not full PKI diagnostics.
+- Public status APIs are informative but may lag real-time service conditions.
+- Incident severity should be revised as new evidence arrives.
+
diff --git a/sample_solutions/AgenticCodeExecution/requirements.txt b/sample_solutions/AgenticCodeExecution/requirements.txt
new file mode 100644
index 00000000..bc815ba7
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/requirements.txt
@@ -0,0 +1,31 @@
+# MCP Agent Servers - Core Dependencies
+# Install with: pip install -r requirements.txt
+
+# MCP Framework
+fastmcp>=2.14.0
+mcp>=1.24.0
+
+# Data Models & Validation
+pydantic>=2.12.0
+pydantic-settings>=2.12.0
+
+# HTTP/Async
+httpx>=0.28.0
+httpx-sse>=0.4.0
+uvicorn>=0.38.0
+anyio>=4.12.0
+
+# Utilities
+python-dotenv>=1.2.0
+
+# Code Execution Sandbox (RestrictedPython)
+RestrictedPython>=8.0.0
+
+# UTCP & Code-Mode with native MCP support
+# Install from local/git:
+# pip install -e /path/to/code-mode
+# OR
+# pip install git+https://github.com/intel-sandbox/code-mode.git
+utcp>=1.1.0
+utcp-mcp>=1.1.0
+mcp-use>=1.3.0
diff --git a/sample_solutions/AgenticCodeExecution/sandbox-server/Dockerfile b/sample_solutions/AgenticCodeExecution/sandbox-server/Dockerfile
new file mode 100644
index 00000000..614dbdc1
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/sandbox-server/Dockerfile
@@ -0,0 +1,26 @@
+ARG PYTHON_BASE_IMAGE=public.ecr.aws/docker/library/python:3.12-slim
+FROM ${PYTHON_BASE_IMAGE}
+
+ARG CODE_MODE_PIP_SPEC=git+https://github.com/universal-tool-calling-protocol/code-mode.git#subdirectory=python-library
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends git ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+ PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+COPY requirements.txt /app/requirements.txt
+COPY sandbox-server/requirements.txt /app/sandbox-server-requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt \
+ && pip install --no-cache-dir -r /app/sandbox-server-requirements.txt \
+ && pip install --no-cache-dir "${CODE_MODE_PIP_SPEC}"
+
+COPY sandbox-server /app/sandbox-server
+WORKDIR /app/sandbox-server
+
+EXPOSE 5051
+
+CMD ["python", "mcp_server_codemode.py", "--host", "0.0.0.0", "--port", "5051", "--tools-url", "http://tools-server:5050/sse"]
diff --git a/sample_solutions/AgenticCodeExecution/sandbox-server/mcp_server_codemode.py b/sample_solutions/AgenticCodeExecution/sandbox-server/mcp_server_codemode.py
new file mode 100644
index 00000000..50b91cee
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/sandbox-server/mcp_server_codemode.py
@@ -0,0 +1,1539 @@
+#!/usr/bin/env python3
+"""
+MCP Sandbox Server - Multi-Engine Implementation
+
+Supports both code-mode (utcp_code_mode) and Monty (pydantic-monty)
+for secure Python code execution.
+Connects to an MCP tools server and exposes execute_python via MCP.
+
+Features (matching code_tools.py behaviour):
+- Dynamic tool description auto-generated from the tools server's list_tools
+- Comprehensive _analyze_error_message hints
+- Session-aware tool calls (mcp-session-id header)
+- Self-sufficient: parses descriptions, params, return types from standard MCP tool listings
+
+Usage:
+ python mcp_server_codemode.py --port 5051 --tools-url http://localhost:5050/sse --engine codemode
+ python mcp_server_codemode.py --port 5051 --tools-url http://localhost:5050/sse --engine monty
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+import re
+import threading
+import time
+import warnings
+
+# Suppress RestrictedPython's benign warning about the 'printed' variable.
+# We read print output via __shared_print_collector__, not the 'printed' local.
+warnings.filterwarnings(
+ "ignore",
+ message=r".*Prints, but never reads 'printed' variable.*",
+ category=SyntaxWarning,
+ module=r"RestrictedPython\.compile",
+)
+from dataclasses import dataclass, field
+from typing import Annotated, Any, Callable, Dict, List, Mapping, Optional, Tuple
+from uuid import uuid4
+from pydantic import Field
+
+from fastmcp import FastMCP, Context
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+
+try:
+ from utcp_code_mode import CodeModeUtcpClient
+ _CODEMODE_IMPORT_ERROR: Optional[Exception] = None
+except Exception as _codemode_exc:
+ CodeModeUtcpClient = object # type: ignore[assignment]
+ _CODEMODE_IMPORT_ERROR = _codemode_exc
+
+try:
+ from utcp.data.tool import Tool, JsonSchema
+ from utcp.data.call_template import CallTemplate
+ _UTCP_IMPORT_ERROR: Optional[Exception] = None
+except Exception as _utcp_exc:
+ class Tool: # type: ignore[no-redef]
+ def __init__(self, *args, **kwargs):
+ pass
+
+ class JsonSchema: # type: ignore[no-redef]
+ def __init__(self, *args, **kwargs):
+ pass
+
+ class CallTemplate: # type: ignore[no-redef]
+ def __init__(self, *args, **kwargs):
+ pass
+
+ _UTCP_IMPORT_ERROR = _utcp_exc
+
+try:
+ import pydantic_monty
+ _MONTY_IMPORT_ERROR: Optional[Exception] = None
+except Exception as _monty_exc:
+ pydantic_monty = None # type: ignore[assignment]
+ _MONTY_IMPORT_ERROR = _monty_exc
+
+# Setup logging
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s [SANDBOX] %(message)s',
+ datefmt='%H:%M:%S'
+)
+logger = logging.getLogger(__name__)
+
+
+# ==================== ERROR ANALYSIS (delegated to tools server) ====================
+
+
+async def _fetch_remote_error_hint(
+ tools_url: str,
+ session_id: str,
+ error_msg: str,
+ code: str,
+) -> str:
+ """Fetch error hint from tools server if it exposes get_execution_error_hint."""
+ headers = {}
+ if session_id:
+ headers["mcp-session-id"] = session_id
+
+ async with sse_client(tools_url, headers=headers) as (read, write):
+ async with ClientSession(read, write) as session:
+ await session.initialize()
+ result = await session.call_tool(
+ "get_execution_error_hint",
+ {
+ "error_msg": error_msg,
+ "code": code,
+ "session_id": session_id,
+ },
+ )
+ if getattr(result, "isError", False):
+ return ""
+
+ if hasattr(result, "content") and result.content:
+ parts = [item.text for item in result.content if hasattr(item, "text")]
+ return "\n".join(parts).strip()
+ return ""
+
+
+async def _fetch_remote_tool_metadata(tools_url: str, session_id: str = "") -> Dict[str, Any]:
+ """Fetch metadata payload from tools server MCP get_tool_metadata tool."""
+ headers = {}
+ if session_id:
+ headers["mcp-session-id"] = session_id
+
+ try:
+ async with sse_client(tools_url, headers=headers) as (read, write):
+ async with ClientSession(read, write) as session:
+ await session.initialize()
+ result = await session.call_tool(
+ "get_tool_metadata",
+ {"session_id": session_id},
+ )
+ if getattr(result, "isError", False):
+ return {}
+
+ if hasattr(result, "content") and result.content:
+ parts = [item.text for item in result.content if hasattr(item, "text")]
+ raw = "\n".join(parts).strip()
+ if raw:
+ parsed = json.loads(raw)
+ if isinstance(parsed, dict):
+ return parsed
+ except Exception as e:
+ logger.warning(f"Could not fetch tool metadata from {tools_url}: {e}")
+
+ return {}
+
+
+def _analyze_error_message_with_tools(error_msg: str, code: str = "", session_id: str = "") -> str:
+ """Resolve error hint from tools server only (no local fallback)."""
+ if not error_msg:
+ return ""
+
+ effective_session_id = session_id or _session_id
+
+ if _tools_url:
+ try:
+ asyncio.get_running_loop()
+ loop_running = True
+ except RuntimeError:
+ loop_running = False
+
+ if not loop_running:
+ try:
+ remote_hint = asyncio.run(
+ _fetch_remote_error_hint(
+ tools_url=_tools_url,
+ session_id=effective_session_id,
+ error_msg=error_msg,
+ code=code,
+ )
+ )
+ if remote_hint:
+ return remote_hint
+ except Exception:
+ pass
+ else:
+ # If already inside an event loop, run in a short-lived thread.
+ result_holder = {"hint": ""}
+
+ def _runner():
+ try:
+ result_holder["hint"] = asyncio.run(
+ _fetch_remote_error_hint(
+ tools_url=_tools_url,
+ session_id=effective_session_id,
+ error_msg=error_msg,
+ code=code,
+ )
+ )
+ except Exception:
+ result_holder["hint"] = ""
+
+ thread = threading.Thread(target=_runner, daemon=True)
+ thread.start()
+ thread.join(timeout=5)
+ if result_holder["hint"]:
+ return result_holder["hint"]
+
+ return ""
+
+
+# ==================== MCP TOOLS CLIENT ====================
+
+@dataclass
+class SandboxResult:
+ """Result from sandbox code execution."""
+ success: bool
+ result: Any = None
+ output: str = ""
+ logs: List[str] = field(default_factory=list)
+ error: Optional[str] = None
+
+
+def _unwrap_exception(exc: BaseException) -> str:
+ """Unwrap ExceptionGroup/TaskGroup to extract the root cause message."""
+ # Handle ExceptionGroup (Python 3.11+) / TaskGroup errors
+ if hasattr(exc, 'exceptions'):
+ # ExceptionGroup: recurse into sub-exceptions
+ messages = []
+ for sub_exc in exc.exceptions:
+ messages.append(_unwrap_exception(sub_exc))
+ return "; ".join(messages)
+ # Handle chained exceptions
+ if exc.__cause__:
+ return _unwrap_exception(exc.__cause__)
+ return str(exc)
+
+
+class McpToolsClient(CodeModeUtcpClient):
+ """CodeModeUtcpClient that proxies tool calls to an MCP server.
+
+ This allows using the code-mode library's secure Python execution
+ while calling tools on a remote MCP server.
+
+ Session-aware: passes mcp-session-id header on every SSE connection
+ so the tools server can isolate DB state per task.
+ """
+
+ def __init__(self, tools_url: str, tools: List[Tool] = None, session_id: str = ""):
+ """Initialize with MCP server URL and discovered tools.
+
+ Args:
+ tools_url: SSE URL of the MCP tools server
+ tools: Pre-discovered list of Tool objects
+ session_id: Session ID to pass as mcp-session-id header
+ """
+ self._tools_url = tools_url
+ self._tool_objects = tools or []
+ self._tool_function_cache: Dict[str, str] = {}
+ self._session_id = session_id
+
+ logger.info(f"McpToolsClient initialized with {len(self._tool_objects)} tools, session={session_id[:8] if session_id else 'none'}...")
+
+ async def call_tool(self, tool_name: str, tool_args: Dict[str, Any] = None) -> Any:
+ """Call a tool on the MCP server."""
+ if tool_args is None:
+ tool_args = {}
+
+ # Strip "actions." prefix if present (code-mode adds this)
+ mcp_tool_name = tool_name
+ if tool_name.startswith("actions."):
+ mcp_tool_name = tool_name[8:] # Remove "actions." prefix
+
+ logger.info(f"Calling MCP tool: {mcp_tool_name} (from {tool_name})")
+
+ # Inject session_id for per-session DB isolation on the tools server
+ if self._session_id:
+ tool_args["session_id"] = self._session_id
+
+ try:
+ headers = {}
+ if self._session_id:
+ headers["mcp-session-id"] = self._session_id
+
+ async with sse_client(self._tools_url, headers=headers) as (read, write):
+ async with ClientSession(read, write) as session:
+ await session.initialize()
+
+ # Call the tool
+ result = await session.call_tool(mcp_tool_name, tool_args)
+
+ # Extract text content from result
+ text_content = ""
+ if hasattr(result, 'content') and result.content:
+ contents = []
+ for item in result.content:
+ if hasattr(item, 'text'):
+ contents.append(item.text)
+ text_content = "\n".join(contents) if contents else str(result)
+ else:
+ text_content = str(result)
+
+ # Check if the MCP tool reported an error
+ if getattr(result, 'isError', False):
+ raise RuntimeError(text_content or "Tool returned an error")
+
+ return text_content
+
+ except RuntimeError:
+ # Re-raise RuntimeError (from isError check above or tool_function wrapper)
+ raise
+ except BaseException as e:
+ # Unwrap ExceptionGroup / TaskGroup to get the real error message
+ root_cause = _unwrap_exception(e)
+ logger.warning(f"MCP tool call failed: {mcp_tool_name}: {root_cause}")
+ raise RuntimeError(f"{root_cause}")
+
+ async def get_tools(self) -> List[Tool]:
+ """Get all registered tools."""
+ return self._tool_objects
+
+ def call_tool_sync(self, tool_name: str, tool_args: Dict[str, Any] = None) -> Any:
+ """Call a tool synchronously, safe even if an event loop is already running."""
+ if tool_args is None:
+ tool_args = {}
+
+ try:
+ asyncio.get_running_loop()
+ loop_running = True
+ except RuntimeError:
+ loop_running = False
+
+ if not loop_running:
+ return asyncio.run(self.call_tool(tool_name, tool_args))
+
+ result_holder: Dict[str, Any] = {"result": None, "error": None}
+
+ def _runner() -> None:
+ try:
+ result_holder["result"] = asyncio.run(self.call_tool(tool_name, tool_args))
+ except BaseException as exc:
+ result_holder["error"] = exc
+
+ thread = threading.Thread(target=_runner, daemon=True)
+ thread.start()
+ thread.join(timeout=30)
+
+ if thread.is_alive():
+ raise RuntimeError(f"Timed out calling MCP tool '{tool_name}'")
+
+ if result_holder["error"] is not None:
+ error = result_holder["error"]
+ if isinstance(error, RuntimeError):
+ raise error
+ raise RuntimeError(_unwrap_exception(error))
+
+ return result_holder["result"]
+
+ async def register_manual(self, *args, **kwargs):
+ pass
+
+ async def register_manuals(self, *args, **kwargs):
+ pass
+
+ async def deregister_manual(self, *args, **kwargs):
+ pass
+
+ async def search_tools(self, query: str, limit: int = 10, **kwargs):
+ """Simple search through tools."""
+ query_lower = query.lower()
+ results = []
+ for tool in self._tool_objects:
+ if query_lower in tool.name.lower() or query_lower in tool.description.lower():
+ results.append(tool)
+ if len(results) >= limit:
+ break
+ return results
+
+ async def _create_execution_context(self, tools: List[Tool], logs: Optional[List[str]] = None) -> Dict[str, Any]:
+ """Create execution context with RestrictedPython guards.
+
+ Override parent to:
+ 1. Add missing _getiter_, _getitem_ guards for RestrictedPython
+ 2. Create tool functions that handle positional arguments
+ """
+ from RestrictedPython import safe_globals
+ from RestrictedPython.Eval import default_guarded_getiter, default_guarded_getitem
+ from RestrictedPython.Guards import guarded_iter_unpack_sequence, full_write_guard
+ from RestrictedPython.PrintCollector import PrintCollector
+ import concurrent.futures
+
+ # Start with RestrictedPython's safe globals
+ context: Dict[str, Any] = safe_globals.copy()
+
+ # Add RestrictedPython guards for iteration, item access, and writes
+ context['_getiter_'] = default_guarded_getiter
+ context['_getitem_'] = default_guarded_getitem
+ context['_iter_unpack_sequence_'] = guarded_iter_unpack_sequence
+ context['_getattr_'] = getattr
+ context['_write_'] = full_write_guard # Needed for list.append(), +=, etc.
+
+ # _inplacevar_ is needed for augmented assignments (+=, -=, *=, etc.)
+ # RestrictedPython passes the OPERATOR STRING (e.g. '+='), not a callable.
+ def _inplacevar_(op, x, y):
+ if op == '+=':
+ return x + y
+ elif op == '-=':
+ return x - y
+ elif op == '*=':
+ return x * y
+ elif op == '/=':
+ return x / y
+ elif op == '//=':
+ return x // y
+ elif op == '%=':
+ return x % y
+ elif op == '**=':
+ return x ** y
+ elif op == '<<=':
+ return x << y
+ elif op == '>>=':
+ return x >> y
+ elif op == '&=':
+ return x & y
+ elif op == '^=':
+ return x ^ y
+ elif op == '|=':
+ return x | y
+ # Fallback: try calling op in case a future RestrictedPython passes a callable
+ elif callable(op):
+ return op(x, y)
+ else:
+ raise ValueError(f'Unknown in-place operator: {op}')
+ context['_inplacevar_'] = _inplacevar_
+
+ # Create restricted import
+ SAFE_MODULES = {'json', 'math', 'datetime', 'time', 're', 'typing', 'collections', 'itertools'}
+
+ def restricted_import(name, *args, **kwargs):
+ if name in SAFE_MODULES:
+ return __import__(name, *args, **kwargs)
+ raise ImportError(f"Import of '{name}' is not allowed")
+
+ # Add builtins
+ if '__builtins__' in context and isinstance(context['__builtins__'], dict):
+ context['__builtins__'].update({
+ '__import__': restricted_import,
+ 'list': list, 'dict': dict, 'set': set, 'tuple': tuple,
+ 'str': str, 'int': int, 'float': float, 'bool': bool,
+ 'len': len, 'range': range, 'isinstance': isinstance,
+ 'hasattr': hasattr, 'getattr': getattr, 'type': type,
+ 'max': max, 'min': min, 'sum': sum, 'abs': abs, 'round': round,
+ 'sorted': sorted, 'reversed': reversed, 'enumerate': enumerate,
+ 'zip': zip, 'filter': filter, 'map': map, 'next': next,
+ })
+
+ # Add safe modules
+ context.update({
+ 'json': __import__('json'),
+ 'math': __import__('math'),
+ 'datetime': __import__('datetime'),
+ 'time': __import__('time'),
+ 're': __import__('re'),
+ '__import__': restricted_import,
+ })
+
+ # Set up print collector
+ shared_print_collector = PrintCollector()
+
+ def print_factory(_getattr=None):
+ return shared_print_collector
+
+ context['_print_'] = print_factory
+ context['_print'] = shared_print_collector
+ context['__shared_print_collector__'] = shared_print_collector
+
+ # Create tool functions that handle BOTH positional and keyword arguments
+ def make_tool_function(tool_obj: Tool):
+ """Create a tool function that handles positional args."""
+ tool_name_ref = tool_obj.name
+
+ # Get parameter names from the tool schema
+ param_names = []
+ if hasattr(tool_obj, 'inputs') and tool_obj.inputs:
+ props = getattr(tool_obj.inputs, 'properties', {}) or {}
+ param_names = list(props.keys())
+
+ def tool_function(*args, **kwargs):
+ # Convert positional args to kwargs using param names
+ call_kwargs = dict(kwargs)
+ for i, arg in enumerate(args):
+ if i < len(param_names):
+ call_kwargs[param_names[i]] = arg
+ else:
+ # Fallback for extra positional args
+ call_kwargs[f'arg{i}'] = arg
+
+ try:
+ logger.info(f"Tool call: {tool_name_ref} with args: {list(call_kwargs.keys())}")
+
+ # Run the async tool call in a thread to avoid event loop issues
+ import concurrent.futures
+ with concurrent.futures.ThreadPoolExecutor() as executor:
+ future = executor.submit(asyncio.run, self.call_tool(tool_name_ref, call_kwargs))
+ result = future.result()
+
+ logger.info(f"Tool call {tool_name_ref} completed")
+ return result
+ except RuntimeError:
+ # Already a clean error from call_tool — pass through as-is
+ raise
+ except BaseException as error:
+ root_cause = _unwrap_exception(error)
+ logger.warning(f"Tool call {tool_name_ref} failed: {root_cause}")
+ raise RuntimeError(f"Error calling tool '{tool_name_ref}': {root_cause}")
+
+ return tool_function
+
+ # Add tool functions organized by manual name
+ for tool in tools:
+ if '.' in tool.name:
+ manual_name, *tool_parts = tool.name.split('.')
+ tool_name = '_'.join(tool_parts)
+
+ # Create manual namespace object if it doesn't exist
+ if manual_name not in context:
+ context[manual_name] = type('Manual', (), {})()
+
+ # Add the tool function to the manual namespace
+ setattr(context[manual_name], tool_name, make_tool_function(tool))
+ else:
+ context[tool.name] = make_tool_function(tool)
+
+ return context
+
+
+# ==================== TOOL DISCOVERY ====================
+
+async def discover_mcp_tools(tools_url: str) -> List[Tool]:
+ """Discover tools from an MCP server and convert to UTCP Tool objects."""
+ if _UTCP_IMPORT_ERROR is not None:
+ logger.warning("UTCP packages unavailable; skipping codemode tool discovery: %s", _UTCP_IMPORT_ERROR)
+ return []
+
+ tools = []
+ internal_tools = {
+ "get_data_model_schema",
+ "get_tool_metadata",
+ "reset_db_session",
+ "list_active_sessions",
+ "get_execution_error_hint",
+ }
+
+ try:
+ async with sse_client(tools_url) as (read, write):
+ async with ClientSession(read, write) as session:
+ await session.initialize()
+
+ result = await session.list_tools()
+
+ for mcp_tool in result.tools:
+ if mcp_tool.name in internal_tools:
+ continue
+ # Add "actions." prefix for code-mode namespace
+ tool_name = f"actions.{mcp_tool.name}"
+
+ # Parse input schema
+ input_schema = mcp_tool.inputSchema if hasattr(mcp_tool, 'inputSchema') else {}
+ properties = {}
+ if isinstance(input_schema, dict) and 'properties' in input_schema:
+ properties = {k: v for k, v in input_schema['properties'].items()
+ if k != 'session_id'} # Hide internal param
+
+ tool = Tool(
+ name=tool_name,
+ description=mcp_tool.description or f"MCP tool: {mcp_tool.name}",
+ inputs=JsonSchema(type="object", properties=properties),
+ outputs=JsonSchema(type="object"),
+ tags=["mcp"],
+ tool_call_template=CallTemplate(name="mcp", call_template_type="mcp"),
+ )
+ tools.append(tool)
+
+ logger.info(f"Discovered {len(tools)} MCP tools")
+ return tools
+
+ except Exception as e:
+ logger.warning(f"Could not discover tools from {tools_url}: {e}")
+ return []
+
+
+async def discover_tools(tools_url: str) -> Dict[str, dict]:
+ """Discover available tools from the MCP tools server (flat dict for description generation)."""
+ try:
+ async with sse_client(tools_url) as (read, write):
+ async with ClientSession(read, write) as session:
+ await session.initialize()
+
+ result = await session.list_tools()
+ tools = {}
+ for tool in result.tools:
+ params = tool.inputSchema if hasattr(tool, 'inputSchema') else {}
+ # Filter out internal session_id parameter
+ if isinstance(params, dict) and 'properties' in params:
+ filtered_props = {k: v for k, v in params['properties'].items()
+ if k != 'session_id'}
+ params = dict(params)
+ params['properties'] = filtered_props
+ tools[tool.name] = {
+ "description": tool.description or "",
+ "parameters": params,
+ }
+ return tools
+ except Exception as e:
+ logger.warning(f"Could not discover tools from {tools_url}: {e}")
+ return {}
+
+
+def _parse_metadata_from_tools(tools_map: Dict[str, dict]) -> dict:
+ """Extract metadata (short descriptions, param descriptions, return types)
+ directly from the MCP tool listings without requiring a special metadata tool.
+
+ Parses the tool description (docstring) and inputSchema properties.
+
+ Returns dict with keys:
+ - return_types: {tool_name: return_type_str}
+ - short_descriptions: {tool_name: first_sentence}
+ - param_descriptions: {tool_name: {param_name: description}}
+ """
+ return_types: Dict[str, str] = {}
+ short_descriptions: Dict[str, str] = {}
+ param_descriptions: Dict[str, Dict[str, str]] = {}
+
+ for name, info in tools_map.items():
+ desc = info.get("description", "")
+
+ # --- Short description: full first line (used in signature) ---
+ first_line = desc.split('\n')[0].strip() if desc else ""
+ short_desc = first_line.rstrip('.') if first_line else name
+ short_descriptions[name] = short_desc
+
+ # --- Return type: all MCP tools return JSON strings, but parse Returns: section ---
+ ret_type = "str"
+ if '\nReturns:' in desc or '\n Returns:' in desc:
+ # Try to detect if it mentions JSON / dict-like returns
+ returns_section = desc.split('Returns:')[-1].strip().split('\n')[0].strip()
+ if 'json' in returns_section.lower() or 'dict' in returns_section.lower():
+ ret_type = "str (JSON)"
+ elif returns_section:
+ ret_type = "str"
+ return_types[name] = ret_type
+
+ # --- Param descriptions: from inputSchema properties ---
+ schema = info.get("parameters", {})
+ properties = schema.get("properties", {})
+ tool_params = {}
+ for param_name, param_info in properties.items():
+ p_desc = param_info.get("description", "")
+ if p_desc:
+ tool_params[param_name] = p_desc.strip().rstrip('.')
+ if tool_params:
+ param_descriptions[name] = tool_params
+
+ return {
+ "return_types": return_types,
+ "short_descriptions": short_descriptions,
+ "param_descriptions": param_descriptions,
+ }
+
+
+# ==================== DYNAMIC DESCRIPTION GENERATION ====================
+
+def _generate_dynamic_description(
+ tools_map: Dict[str, dict],
+ tool_metadata: Dict[str, Any] = None,
+) -> str:
+ """Auto-generate a description of all available actions.
+
+ Produces output matching code_tools.py's _generate_actions_description():
+ - Signature line: `- actions.name(params) -> ReturnType: First sentence of description`
+ - Parameter descriptions indented below
+ - DATA MODEL REFERENCE section with compact type definitions
+
+ Works self-sufficiently from MCP list_tools data. If tool_metadata is provided
+ (e.g. from a get_tool_metadata tool), it will be used as an override.
+ """
+ # Parse metadata directly from tools_map (always available)
+ parsed = _parse_metadata_from_tools(tools_map)
+
+ # Allow external tool_metadata to override parsed values
+ if tool_metadata:
+ for key in ("return_types", "short_descriptions", "param_descriptions"):
+ if key in tool_metadata and tool_metadata[key]:
+ parsed[key].update(tool_metadata[key])
+
+ return_types = parsed["return_types"]
+ short_descriptions = parsed["short_descriptions"]
+ param_descriptions = parsed["param_descriptions"]
+ semantic_types = (tool_metadata or {}).get("semantic_types", {})
+
+ # Filter out internal/meta tools that shouldn't be exposed to the agent
+ internal_tools = {
+ "get_data_model_schema",
+ "get_tool_metadata",
+ "reset_db_session",
+ "list_active_sessions",
+ "get_execution_error_hint",
+ }
+
+ lines = [
+ "API REFERENCE - DO NOT CALL THESE DIRECTLY AS TOOLS.",
+ "YOU MUST USE `execute_python` TO CALL THESE FUNCTIONS (e.g. `actions.find_user...`).",
+ "",
+ "AVAILABLE ACTIONS (call these via actions.method_name(...) in your Python code):",
+ ""
+ ]
+
+ ordered_actions = (tool_metadata or {}).get("ordered_actions", [])
+ iteration_names: List[str]
+ if isinstance(ordered_actions, list) and ordered_actions:
+ seen = set()
+ iteration_names = []
+ for action_name in ordered_actions:
+ if action_name in tools_map and action_name not in seen:
+ iteration_names.append(action_name)
+ seen.add(action_name)
+ for action_name in tools_map.keys():
+ if action_name not in seen:
+ iteration_names.append(action_name)
+ else:
+ iteration_names = list(tools_map.keys())
+
+ for name in iteration_names:
+ info = tools_map.get(name, {})
+ if name in internal_tools:
+ continue
+
+ # Get parameters from schema (just the names)
+ params = []
+ schema = info.get("parameters", {})
+ properties = schema.get("properties", {})
+ for param_name in properties:
+ if param_name == "session_id":
+ continue # Hide internal session parameter from LLM
+ params.append(param_name)
+ params_str = ', '.join(params)
+
+ # Get return type
+ ret_type = return_types.get(name, "str")
+ semantic_type = semantic_types.get(name, "")
+
+ # Get short description (for the signature line)
+ short_desc = short_descriptions.get(name, "")
+
+ # Format: - actions.name(params) -> Type: First sentence
+ if semantic_type:
+ lines.append(
+ f"- actions.{name}({params_str}) -> {ret_type} (semantic: {semantic_type}): {short_desc}"
+ )
+ else:
+ lines.append(f"- actions.{name}({params_str}) -> {ret_type}: {short_desc}")
+
+ # Append parameter descriptions if available (POC format)
+ for param_name, p_desc in param_descriptions.get(name, {}).items():
+ lines.append(f" {param_name}: {p_desc}")
+
+ # Include the FULL description body (everything after the first line)
+ # This preserves Args, Returns, Usage sections exactly as written
+ full_desc = info.get("description", "")
+ if full_desc:
+ desc_lines = full_desc.strip().splitlines()
+ remaining = desc_lines[1:] # Skip first line (already in signature)
+ if remaining:
+ # Find minimum indentation of non-empty lines to preserve structure
+ non_empty = [l for l in remaining if l.strip()]
+ if non_empty:
+ min_indent = min(len(l) - len(l.lstrip()) for l in non_empty)
+ for dline in remaining:
+ if dline.strip():
+ # Preserve relative indentation, add 4-space base indent
+ lines.append(f" {dline[min_indent:]}")
+ # Skip blank lines to keep output compact
+
+ # Add DATA MODEL REFERENCE section
+ # If tool_metadata provides data_model_defs, use them
+ data_model_defs = (tool_metadata or {}).get("data_model_defs", {})
+ if data_model_defs:
+ lines.append("")
+ lines.append("DATA MODEL REFERENCE:")
+
+ for type_name, type_schema in data_model_defs.items():
+ properties = type_schema.get('properties', {})
+
+ props = []
+ for prop_name, prop_info in properties.items():
+ prop_type = prop_info.get('type', '')
+ if '$ref' in prop_info:
+ prop_type = prop_info['$ref'].split('/')[-1]
+ elif 'anyOf' in prop_info:
+ types = [t.get('$ref', '').split('/')[-1] or t.get('type', '')
+ for t in prop_info['anyOf'] if t.get('type') != 'null']
+ prop_type = ' | '.join(filter(None, types))
+ elif prop_info.get('type') == 'array':
+ items = prop_info.get('items', {})
+ item_type = items.get('$ref', '').split('/')[-1] or items.get('type', 'Any')
+ prop_type = f"list[{item_type}]"
+ elif prop_info.get('type') == 'object':
+ prop_type = "dict"
+ props.append(f"{prop_name}: {prop_type}")
+
+ props_str = ', '.join(props)
+ lines.append(f"- {type_name}: {props_str}")
+
+ return '\n'.join(lines)
+
+
+def get_tools_documentation(available_tools: Dict[str, dict]) -> str:
+ """Generate documentation string for available tools."""
+ if not available_tools:
+ return "No tools discovered. Make sure tools server is running."
+
+ internal_tools = {
+ "get_data_model_schema",
+ "get_tool_metadata",
+ "reset_db_session",
+ "list_active_sessions",
+ "get_execution_error_hint",
+ }
+
+ lines = ["Available actions:"]
+ for name, info in sorted(available_tools.items()):
+ if name in internal_tools:
+ continue
+ desc = info.get("description", "")
+ if len(desc) > 100:
+ desc = desc[:100] + "..."
+ lines.append(f"- actions.{name}(...) - {desc}")
+
+ return "\n".join(lines)
+
+
+
+
+# ==================== SANDBOX ====================
+
+class Sandbox:
+ """High-level sandbox for secure Python code execution with MCP tools."""
+
+ def __init__(self, tools_url: str, tools: List[Tool] = None, timeout: int = 60,
+ session_id: str = ""):
+ """Initialize the sandbox.
+
+ Args:
+ tools_url: SSE URL of the MCP tools server
+ tools: Pre-discovered list of Tool objects
+ timeout: Default execution timeout in seconds
+ session_id: Session ID for DB isolation
+ """
+ if _CODEMODE_IMPORT_ERROR is not None:
+ raise RuntimeError(
+ "codemode engine is unavailable because utcp_code_mode failed to import: "
+ f"{_CODEMODE_IMPORT_ERROR}"
+ )
+
+ self._tools_url = tools_url
+ self._tools = tools or []
+ self._client = McpToolsClient(tools_url, self._tools, session_id=session_id)
+ self._timeout = timeout
+ self._session_id = session_id
+
+ def execute(self, code: str, timeout: Optional[int] = None) -> SandboxResult:
+ """Execute Python code in the sandbox (synchronous)."""
+ start_time = time.time()
+ timeout = timeout or self._timeout
+
+ result_container = {"result": None, "error": None}
+
+ def run_async():
+ try:
+ result_container["result"] = asyncio.run(
+ self._client.call_tool_chain(code, timeout)
+ )
+ except Exception as e:
+ result_container["error"] = e
+
+ thread = threading.Thread(target=run_async)
+ thread.start()
+ thread.join(timeout=timeout + 5)
+
+ if result_container["error"]:
+ return SandboxResult(
+ success=False,
+ error=str(result_container["error"]),
+ logs=[f"[ERROR] {result_container['error']}"],
+ )
+
+ if result_container["result"] is None:
+ return SandboxResult(
+ success=False,
+ error=f"Execution timed out after {timeout} seconds",
+ logs=[f"[ERROR] Timeout after {timeout}s"],
+ )
+
+ cm_result = result_container["result"]
+ logs = cm_result.get("logs", [])
+ has_error = any("[ERROR]" in str(log) for log in logs)
+
+ return SandboxResult(
+ success=not has_error,
+ result=cm_result.get("result"),
+ output="\n".join(str(log) for log in logs),
+ logs=logs,
+ error=logs[-1] if has_error and logs else None,
+ )
+
+
+class MontySandbox:
+ """Sandbox engine backed by pydantic-monty with MCP tools as external functions."""
+
+ def __init__(self, tools_url: str, tools_map: Dict[str, dict], timeout: int = 60,
+ session_id: str = ""):
+ if pydantic_monty is None or _MONTY_IMPORT_ERROR is not None:
+ raise RuntimeError(
+ "monty engine is unavailable because pydantic_monty failed to import: "
+ f"{_MONTY_IMPORT_ERROR}"
+ )
+
+ self._tools_url = tools_url
+ self._tools_map = tools_map or {}
+ self._timeout = timeout
+ self._session_id = session_id
+ self._client = McpToolsClient(tools_url, [], session_id=session_id)
+
+ @staticmethod
+ def _rewrite_actions_calls(code: str) -> str:
+ """Adapt common code-mode patterns to Monty-compatible code."""
+ rewritten = code
+ rewritten = re.sub(r"\bactions\.([A-Za-z_]\w*)(\s*\()", r"\1\2", rewritten)
+
+ # Monty does not currently provide stdlib json import.
+ # Convert common patterns to host-provided helpers.
+ rewritten = re.sub(r"(?m)^\s*import\s+json\s*$", "", rewritten)
+ rewritten = rewritten.replace("json.loads(", "json_loads(")
+ rewritten = rewritten.replace("json.dumps(", "json_dumps(")
+
+ return rewritten
+
+ def _build_external_functions(self) -> Dict[str, Callable[..., Any]]:
+ external_functions: Dict[str, Callable[..., Any]] = {}
+
+ # Compatibility helpers for code that previously relied on `import json`.
+ external_functions["json_loads"] = json.loads
+ external_functions["json_dumps"] = json.dumps
+
+ for tool_name, info in self._tools_map.items():
+ schema = info.get("parameters", {}) if isinstance(info, dict) else {}
+ properties = schema.get("properties", {}) if isinstance(schema, dict) else {}
+ param_names = [name for name in properties.keys() if name != "session_id"]
+
+ def _make_tool_fn(name: str, ordered_params: List[str]) -> Callable[..., Any]:
+ def _tool_fn(*args, **kwargs):
+ call_kwargs = dict(kwargs)
+ for i, arg in enumerate(args):
+ if i < len(ordered_params):
+ call_kwargs[ordered_params[i]] = arg
+ else:
+ call_kwargs[f"arg{i}"] = arg
+
+ try:
+ return self._client.call_tool_sync(name, call_kwargs)
+ except RuntimeError:
+ raise
+ except BaseException as error:
+ root_cause = _unwrap_exception(error)
+ raise RuntimeError(f"Error calling tool '{name}': {root_cause}")
+
+ return _tool_fn
+
+ external_functions[tool_name] = _make_tool_fn(tool_name, param_names)
+
+ return external_functions
+
+ def execute(self, code: str, timeout: Optional[int] = None) -> SandboxResult:
+ timeout = timeout or self._timeout
+ adapted_code = self._rewrite_actions_calls(code)
+ logs: List[str] = []
+
+ def _print_cb(_stream: str, text: str) -> None:
+ logs.append(text)
+
+ limits = {"max_duration_secs": float(timeout)}
+
+ try:
+ runner = pydantic_monty.Monty(adapted_code, script_name="sandbox.py")
+ result = runner.run(
+ external_functions=self._build_external_functions(),
+ print_callback=_print_cb,
+ limits=limits,
+ )
+ return SandboxResult(
+ success=True,
+ result=result,
+ output="".join(logs).rstrip("\n"),
+ logs=logs,
+ error=None,
+ )
+ except Exception as error:
+ root_cause = _unwrap_exception(error)
+ return SandboxResult(
+ success=False,
+ result=None,
+ output="".join(logs).rstrip("\n"),
+ logs=logs,
+ error=root_cause,
+ )
+
+
+# ==================== BASE DESCRIPTION ====================
+
+EXECUTE_PYTHON_BASE_DESCRIPTION = """Executes Python code in a state-less sandbox environment.
+
+PRINCIPLES FOR SUCCESSFUL TOOL USE:
+
+1. STATELESS EXECUTION (CRITICAL):
+ This environment does NOT preserve variables between turns.
+ If you defined `my_var = "123"` in the previous turn, it is GONE.
+ You MUST redefine all variables or use string literals in every single block.
+
+2. GROUND TRUTH ONLY (NO GUESSING):
+ You must NEVER guess parameters like emails, IDs, or names.
+ Even if the pattern seems obvious, IT WILL BE WRONG.
+ ALWAYS ask the user for information if it is missing.
+
+3. PRINT EVERYTHING:
+ This tool only returns what you explicitly `print()`.
+ Assign results to variables and print them immediately.
+ Without prints retrieved information will not be visible to the model.
+
+4. INPUT BAN (HIGHEST PRIORITY):
+ - Never use `input()`.
+ - Never reference placeholder variables such as `input`, `Input`, `user_input`.
+ - If a value is missing, ask the user in a normal assistant message.
+
+5. LOOKUP FAILURE STOP RULE:
+ - If user/order/product/payment lookup fails, do NOT continue with write actions.
+ - Try one alternative lookup strategy.
+ - If still unresolved, ask the user for corrected identifying information.
+
+6. TYPE-SAFE ACCESS RULE:
+ - Before field access/iteration on important values, print type and value:
+ `print(type(x), x)`.
+ - If value is a JSON string, parse once with `json.loads`.
+ - If value is already dict/list/object, do NOT parse again.
+
+EXAMPLE:
+```python
+import json
+result = actions.some_method("arg")
+print(result)
+```
+
+SAFE EXECUTION TEMPLATE:
+```python
+import json
+raw = actions.some_method("arg")
+data = json.loads(raw) if isinstance(raw, str) and raw[:1] in '{[' else raw
+print(type(data), data)
+```
+
+IMPORTANT: Read each tool's description carefully to understand its return type.
+Some tools return JSON strings that must be parsed with `json.loads()` before accessing fields.
+
+API REFERENCE:
+The `actions` object is pre-loaded with these methods:
+ """
+
+EXECUTE_PYTHON_BASE_DESCRIPTION_MONTY = """Executes Python code in a state-less sandbox environment.
+
+PRINCIPLES FOR SUCCESSFUL TOOL USE:
+
+1. STATELESS EXECUTION (CRITICAL):
+ This environment does NOT preserve variables between turns.
+ You MUST redefine all variables or use string literals in every block.
+
+2. GROUND TRUTH ONLY (NO GUESSING):
+ NEVER guess IDs, emails, names, or account values.
+
+3. PRINT EVERYTHING:
+ This tool only returns what you explicitly `print()`.
+
+4. INPUT BAN (HIGHEST PRIORITY):
+ - Never use `input()`.
+
+5. MONTY COMPATIBILITY:
+ - Do NOT use `import json` in code.
+ - Use `json_loads(...)` and `json_dumps(...)` helper functions instead.
+
+EXAMPLE:
+```python
+result = actions.some_method("arg")
+print(result)
+```
+
+SAFE EXECUTION TEMPLATE:
+```python
+raw = actions.some_method("arg")
+data = json_loads(raw) if isinstance(raw, str) and raw[:1] in '{[' else raw
+print(type(data), data)
+```
+
+API REFERENCE:
+The `actions` object is pre-loaded with these methods:
+ """
+
+# ==================== MCP SERVER ====================
+
+# Create the MCP server
+mcp = FastMCP(
+ "Code Sandbox Server",
+ instructions="""You are a Python coding agent for customer service tasks.
+Execute Python code to help customers using the connected tools server.
+
+Use the execute_python tool with code that calls actions.method_name() to interact with the current domain.
+For example: actions.method_name(...)"""
+)
+
+# Global state
+_tools_url: Optional[str] = None
+_available_tools: Dict[str, dict] = {} # tool_name -> {description, parameters}
+_utcp_tools: List[Tool] = [] # UTCP Tool objects for code-mode sandbox
+_session_id: str = "" # Stable session ID for DB isolation (set once at startup)
+_flowise_chat_id: str = "" # chatId explicitly registered via get_session_id
+_session_lock = threading.Lock()
+_tools_refresh_lock = threading.Lock()
+_tools_refresh_interval_sec = 10
+_last_tools_refresh_ts = 0.0
+_last_tools_refresh_error = ""
+_tools_watcher_started = False
+_current_execute_python_description = ""
+_registered_core_tools = False
+_execution_engine = "codemode"
+
+
+def _as_non_empty_str(value: Any) -> str:
+ """Normalize a candidate session value to a non-empty string."""
+ if value is None:
+ return ""
+ text = str(value).strip()
+ if not text or text.lower() in {"none", "null"}:
+ return ""
+ return text
+
+
+def _set_flowise_chat_id(chat_id: str) -> str:
+ """Persist the latest Flowise chatId for execute_python session routing."""
+ normalized = _as_non_empty_str(chat_id)
+ if not normalized:
+ return ""
+ with _session_lock:
+ global _flowise_chat_id
+ _flowise_chat_id = normalized
+ return normalized
+
+
+def _get_effective_session_id() -> Tuple[str, str]:
+ """Resolve session id strictly from get_session_id chatId, with startup fallback."""
+ with _session_lock:
+ chat_id = _flowise_chat_id
+ if chat_id:
+ return chat_id, "flowise-chat-id"
+ return _session_id, "startup-fallback"
+
+
+def _build_execute_python_description() -> str:
+ """Build the current execute_python description from discovered tools + metadata."""
+ tool_metadata: Dict[str, Any] = {}
+ if _tools_url:
+ tool_metadata = asyncio.run(_fetch_remote_tool_metadata(_tools_url, _session_id))
+
+ metadata_full_desc = ""
+ if tool_metadata:
+ maybe_desc = tool_metadata.get("execute_python_description", "")
+ if isinstance(maybe_desc, str):
+ metadata_full_desc = maybe_desc
+
+ if metadata_full_desc:
+ logger.info("Using metadata-provided execute_python description for exact parity")
+ return metadata_full_desc
+
+ actions_desc = _generate_dynamic_description(_available_tools, tool_metadata)
+ base = (
+ EXECUTE_PYTHON_BASE_DESCRIPTION_MONTY
+ if _execution_engine == "monty"
+ else EXECUTE_PYTHON_BASE_DESCRIPTION
+ )
+ return base + "\n\n" + actions_desc
+
+
+def _upsert_execute_python_tool(reason: str = "") -> None:
+ """Register or update execute_python tool description if changed."""
+ global _current_execute_python_description
+
+ if not _available_tools:
+ return
+
+ new_desc = _build_execute_python_description()
+ if not new_desc:
+ return
+
+ if _current_execute_python_description == new_desc:
+ return
+
+ try:
+ mcp.remove_tool("execute_python")
+ except Exception:
+ pass
+
+ mcp.tool(
+ name="execute_python",
+ description=new_desc,
+ exclude_args=["ctx"],
+ )(execute_python)
+ _current_execute_python_description = new_desc
+ logger.info(
+ "Registered/updated execute_python tool description (%s, %d chars)",
+ reason or "refresh",
+ len(new_desc),
+ )
+ logger.info("===== BEGIN execute_python DESCRIPTION (%s) =====", reason or "refresh")
+ logger.info("%s", new_desc)
+ logger.info("===== END execute_python DESCRIPTION =====")
+
+
+def _register_core_tools() -> None:
+ """Register core sandbox tools once."""
+ global _registered_core_tools
+ if _registered_core_tools:
+ return
+ mcp.tool(
+ name="get_session_id",
+ description="Set the runtime session id from Flowise chatId and return it as JSON.",
+ )(get_session_id_from_flowise)
+ _registered_core_tools = True
+ logger.info("Registered get_session_id tool for Flowise chat/session bootstrap")
+
+
+def _refresh_tools_cache(force: bool = False, reason: str = "") -> bool:
+ """Refresh tool discovery cache from tools-server.
+
+ This is resilient by design:
+ - If discovery fails, keep existing cached tools.
+ - If discovery returns empty, keep existing cached tools.
+ """
+ global _utcp_tools, _available_tools, _last_tools_refresh_ts, _last_tools_refresh_error
+
+ if not _tools_url:
+ return False
+
+ now = time.time()
+ if not force and _utcp_tools and (now - _last_tools_refresh_ts) < _tools_refresh_interval_sec:
+ return True
+
+ with _tools_refresh_lock:
+ now = time.time()
+ if not force and _utcp_tools and (now - _last_tools_refresh_ts) < _tools_refresh_interval_sec:
+ return True
+
+ try:
+ discovered_utcp = asyncio.run(discover_mcp_tools(_tools_url))
+ discovered_flat = asyncio.run(discover_tools(_tools_url))
+
+ if discovered_utcp:
+ _utcp_tools = discovered_utcp
+ if discovered_flat:
+ _available_tools = discovered_flat
+
+ if discovered_utcp or discovered_flat:
+ _last_tools_refresh_ts = now
+ _last_tools_refresh_error = ""
+ _upsert_execute_python_tool(reason=reason or "refresh")
+ logger.info(
+ "Refreshed tools cache (%s): utcp=%d, flat=%d",
+ reason or "refresh",
+ len(_utcp_tools),
+ len(_available_tools),
+ )
+ return True
+
+ _last_tools_refresh_error = "no tools discovered"
+ if _utcp_tools:
+ return True
+ logger.warning("Tool refresh returned no tools (%s)", reason or "refresh")
+ return False
+ except Exception as exc:
+ _last_tools_refresh_error = str(exc)
+ if _utcp_tools:
+ logger.warning(
+ "Tool refresh failed (%s), keeping cached tools: %s",
+ reason or "refresh",
+ exc,
+ )
+ return True
+ logger.warning("Tool refresh failed (%s): %s", reason or "refresh", exc)
+ return False
+
+
+def _start_tools_watcher() -> None:
+ """Background watcher that keeps trying to refresh tool cache."""
+ global _tools_watcher_started
+ if _tools_watcher_started:
+ return
+
+ def _worker() -> None:
+ while True:
+ _refresh_tools_cache(force=False, reason="background")
+ time.sleep(_tools_refresh_interval_sec)
+
+ thread = threading.Thread(target=_worker, daemon=True)
+ thread.start()
+ _tools_watcher_started = True
+ logger.info("Started tools watcher (interval=%ss)", _tools_refresh_interval_sec)
+
+
+def get_session_id_from_flowise(sessionId: str = "", chatId: str = "", ctx: Context = None) -> str:
+ """Store Flowise chatId and return it as the only runtime session id."""
+ _ = sessionId
+ _ = ctx
+ chat_id_value = _as_non_empty_str(chatId)
+
+ if not chat_id_value:
+ logger.warning("get_session_id called without chatId; keeping previous session id")
+ payload = {
+ "ok": False,
+ "error": "chatId is required",
+ "session_id": _flowise_chat_id or _session_id,
+ "chat_id": _flowise_chat_id,
+ "source": "flowise-bootstrap",
+ }
+ return json.dumps(payload)
+
+ effective_id = _set_flowise_chat_id(chat_id_value)
+
+ payload = {
+ "ok": True,
+ "session_id": effective_id,
+ "chat_id": effective_id,
+ "source": "flowise-bootstrap",
+ }
+ logger.info(
+ "Flowise session bootstrap: sessionId=%s, chatId=%s -> using=%s",
+ sessionId,
+ chatId,
+ effective_id,
+ )
+ return json.dumps(payload)
+
+def execute_python(
+ code: Annotated[
+ str,
+ Field(
+ description=(
+ "Python code to execute. Call tools via actions.method_name(...), "
+ "print outputs you need, and do not use input()."
+ ),
+ min_length=1,
+ max_length=20000,
+ ),
+ ],
+ ctx: Optional[Context] = None,
+) -> str:
+ """Execute Python code with access to tools via the actions object."""
+ _ = ctx
+
+ if _execution_engine == "codemode":
+ if not _utcp_tools:
+ _refresh_tools_cache(force=True, reason="execute_python")
+
+ if not _utcp_tools:
+ return (
+ "[ERROR] Connected tools are not available yet. "
+ "Sandbox is waiting for tools-server at the configured --tools-url. "
+ "Please retry in a few seconds."
+ )
+ else:
+ if not _available_tools:
+ _refresh_tools_cache(force=True, reason="execute_python")
+
+ if not _available_tools:
+ return (
+ "[ERROR] Connected tools are not available yet. "
+ "Sandbox is waiting for tools-server at the configured --tools-url. "
+ "Please retry in a few seconds."
+ )
+
+ session_id, session_source = _get_effective_session_id()
+
+ logger.info(f"========================================")
+ logger.info(
+ f"Engine: {_execution_engine} | Session: {session_id[:12]}... ({session_source}) | Executing code ({len(code)} chars)"
+ )
+ logger.info(f"========================================")
+
+ if not code:
+ return "Error: No code provided. Please provide the python code to execute in the 'code' argument."
+
+ # Create a per-request sandbox with the session ID for DB isolation
+ if _execution_engine == "codemode":
+ sandbox = Sandbox(
+ tools_url=_tools_url,
+ tools=_utcp_tools,
+ session_id=session_id,
+ )
+ else:
+ sandbox = MontySandbox(
+ tools_url=_tools_url,
+ tools_map=_available_tools,
+ session_id=session_id,
+ )
+
+ try:
+ result = sandbox.execute(code)
+
+ output_parts = []
+
+ # Collect printed output (excluding [ERROR] lines to avoid duplication)
+ if result.output:
+ clean_output = "\n".join(
+ line for line in result.output.splitlines()
+ if not line.strip().startswith("[ERROR]")
+ ).strip()
+ if clean_output:
+ output_parts.append(clean_output)
+
+ if not result.success:
+ # Extract the core error message, stripping repeated "Code execution failed:" prefixes
+ error_msg = result.error or "Unknown error"
+ while "Code execution failed: Code execution failed:" in error_msg:
+ error_msg = error_msg.replace("Code execution failed: Code execution failed:",
+ "Code execution failed:", 1)
+ # Also strip the [ERROR] prefix if the library added it
+ error_msg = error_msg.lstrip("[ERROR] ").strip()
+ # Strip one remaining "Code execution failed: " to get to the root cause
+ if error_msg.startswith("Code execution failed: "):
+ error_msg = error_msg[len("Code execution failed: "):]
+
+ # Add SYSTEM HINT if not already present
+ if "SYSTEM HINT:" not in error_msg:
+ hint = _analyze_error_message_with_tools(error_msg, code, session_id=session_id)
+ if hint:
+ error_msg += f"\n\nSYSTEM HINT: {hint}"
+ output_parts.append(f"[ERROR] {error_msg}")
+
+ return "\n".join(output_parts) if output_parts else "(no output)"
+
+ except Exception as e:
+ import traceback
+ tb = traceback.format_exc()
+ logger.error(f"Execution failed: {e}\n{tb}")
+ # Extract root cause
+ root_msg = _unwrap_exception(e)
+ hint = _analyze_error_message_with_tools(root_msg, code, session_id=session_id)
+ error_output = f"[ERROR] {root_msg}"
+ if hint:
+ error_output += f"\n\nSYSTEM HINT: {hint}"
+ return error_output
+
+
+@mcp.tool()
+def list_available_actions() -> str:
+ """List all available actions from the connected tools server.
+
+ Returns:
+ A list of available action methods and their descriptions.
+ """
+ if not _available_tools:
+ _refresh_tools_cache(force=True, reason="list_available_actions")
+ return get_tools_documentation(_available_tools)
+
+
+def main():
+ global _tools_url, _available_tools, _utcp_tools, _session_id, _execution_engine
+
+ parser = argparse.ArgumentParser(description="MCP Sandbox Server (Code-Mode/Monty)")
+ parser.add_argument("--port", type=int, default=5051, help="Port to listen on")
+ parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
+ parser.add_argument("--tools-url", required=True, help="URL of MCP tools server (SSE)")
+ parser.add_argument(
+ "--engine",
+ choices=["codemode", "monty"],
+ default="codemode",
+ help="Python execution engine to use (default: codemode)",
+ )
+ parser.add_argument("--session-id", default="",
+ help="Fixed session ID for DB isolation. "
+ "If empty (default), auto-generates one at startup. "
+ "All execute_python calls share this single session. "
+ "Restart the server to get a fresh DB.")
+ args = parser.parse_args()
+
+ _tools_url = args.tools_url
+ _execution_engine = args.engine
+
+ if _execution_engine == "codemode" and _CODEMODE_IMPORT_ERROR is not None:
+ raise RuntimeError(
+ "codemode engine selected but utcp_code_mode is unavailable: "
+ f"{_CODEMODE_IMPORT_ERROR}"
+ )
+ if _execution_engine == "monty" and _MONTY_IMPORT_ERROR is not None:
+ raise RuntimeError(
+ "monty engine selected but pydantic_monty is unavailable: "
+ f"{_MONTY_IMPORT_ERROR}"
+ )
+
+ # Set up stable session ID (one per server lifetime)
+ if args.session_id:
+ _session_id = args.session_id
+ else:
+ _session_id = str(uuid4())
+ logger.info(f"Execution engine: {_execution_engine}")
+ logger.info(f"Session ID: {_session_id[:12]}... (all calls share this DB session)")
+
+ # Initial discovery (non-fatal if tools-server is not up yet)
+ logger.info(f"Discovering tools from {_tools_url}...")
+ _refresh_tools_cache(force=True, reason="startup")
+ logger.info(f"Discovered {len(_utcp_tools)} UTCP tools")
+ for tool in sorted(_utcp_tools, key=lambda t: t.name):
+ logger.info(f" - {tool.name}")
+
+ logger.info(f"Discovered {len(_available_tools)} tools for description")
+ for name in sorted(_available_tools.keys()):
+ logger.info(f" - {name}")
+
+ _register_core_tools()
+ _upsert_execute_python_tool(reason="startup")
+
+ # Keep sandbox running and auto-recover when tools-server appears/restarts.
+ _start_tools_watcher()
+
+ # Run the MCP server
+ logger.info(f"Starting MCP sandbox server on {args.host}:{args.port}")
+ mcp.run(transport="sse", host=args.host, port=args.port)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/sample_solutions/AgenticCodeExecution/sandbox-server/requirements.txt b/sample_solutions/AgenticCodeExecution/sandbox-server/requirements.txt
new file mode 100644
index 00000000..cc1243bb
--- /dev/null
+++ b/sample_solutions/AgenticCodeExecution/sandbox-server/requirements.txt
@@ -0,0 +1,5 @@
+fastmcp>=2.0.0
+pydantic>=2.0.0
+pydantic-monty>=0.0.8
+uvicorn
+starlette