diff --git a/camel/agents/chat_agent.py b/camel/agents/chat_agent.py index 93774568d6..6047893896 100644 --- a/camel/agents/chat_agent.py +++ b/camel/agents/chat_agent.py @@ -495,7 +495,13 @@ def __init__( self.model_backend.token_counter, self.model_backend.token_limit, ) - + # check the memory is a class type, then instantiate it + if memory is not None and inspect.isclass(memory): + memory = memory( + context_creator, + window_size=message_window_size, + agent_id=self.agent_id, + ) self._memory: AgentMemory = memory or ChatHistoryMemory( context_creator, window_size=message_window_size, diff --git a/camel/memories/__init__.py b/camel/memories/__init__.py index 44dbae4059..2643c48a40 100644 --- a/camel/memories/__init__.py +++ b/camel/memories/__init__.py @@ -16,6 +16,7 @@ ChatHistoryMemory, LongtermAgentMemory, VectorDBMemory, + BrowserChatHistoryMemory, ) from .base import AgentMemory, BaseContextCreator, MemoryBlock from .blocks.chat_history_block import ChatHistoryBlock @@ -32,6 +33,7 @@ 'ScoreBasedContextCreator', 'ChatHistoryMemory', 'VectorDBMemory', + 'BrowserChatHistoryMemory', 'ChatHistoryBlock', 'VectorDBBlock', 'LongtermAgentMemory', diff --git a/camel/memories/agent_memories.py b/camel/memories/agent_memories.py index 4fff306d66..4bb4e38453 100644 --- a/camel/memories/agent_memories.py +++ b/camel/memories/agent_memories.py @@ -14,6 +14,7 @@ import warnings from typing import List, Optional +import re from camel.memories.base import AgentMemory, BaseContextCreator from camel.memories.blocks import ChatHistoryBlock, VectorDBBlock @@ -21,6 +22,7 @@ from camel.storages.key_value_storages.base import BaseKeyValueStorage from camel.storages.vectordb_storages.base import BaseVectorStorage from camel.types import OpenAIBackendRole +from camel.messages.func_message import FunctionCallingMessage class ChatHistoryMemory(AgentMemory): @@ -319,3 +321,171 @@ def remove_records_by_indices( ) -> List[MemoryRecord]: r"""Removes records at specified indices from chat history.""" return self.chat_history_block.remove_records_by_indices(indices) + + +class BrowserChatHistoryMemory(AgentMemory): + r"""An browser agent memory wrapper of :obj:`ChatHistoryBlock` for browser agent. + + Args: + context_creator (BaseContextCreator): A model context creator. + storage (BaseKeyValueStorage, optional): A storage backend for storing + chat history. If `None`, an :obj:`InMemoryKeyValueStorage` + will be used. (default: :obj:`None`) + window_size (int, optional): The number of recent chat messages to + retrieve. If not provided, the entire chat history will be + retrieved. (default: :obj:`None`) + agent_id (str, optional): The ID of the agent associated with the chat + history. + """ + + def __init__( + self, + context_creator: BaseContextCreator, + storage: Optional[BaseKeyValueStorage] = None, + window_size: Optional[int] = None, + agent_id: Optional[str] = None, + prune_history_tool_calls: bool = True, + ) -> None: + if window_size is not None and not isinstance(window_size, int): + raise TypeError("`window_size` must be an integer or None.") + if window_size is not None and window_size < 0: + raise ValueError("`window_size` must be non-negative.") + self._context_creator = context_creator + self._window_size = window_size + self._chat_history_block = ChatHistoryBlock( + storage=storage, + ) + self._agent_id = agent_id + self.prune_history_tool_calls = prune_history_tool_calls + + @property + def agent_id(self) -> Optional[str]: + return self._agent_id + + @agent_id.setter + def agent_id(self, val: Optional[str]) -> None: + self._agent_id = val + + def clean_lines(self, content:str) -> str: + """ Keep only the meaningful web content + Args: + content (str): The raw web content from browser toolkit to be cleaned. + """ + if not content: + return "" + result = set() + for line in content.splitlines(): + # Trim whitespace + line = line.strip() + # Remove everything in square brackets, including the brackets + line = re.sub(r'\[.*?\]', '', line) + # If line starts with - /url: keep only the URL part + m = re.match(r'^-\s*/url:\s*(.+)', line) + if m: + line = m.group(1) + else: + # Remove labels starting with - and any non-space, + # non-colon chars, followed by optional space or colon + line = re.sub(r'^-\s*[^:\s]+\s*:? ?', '', line) + # Remove trailing colons and spaces + line = re.sub(r'[:\s]+$', '', line) + # Remove leading and trailing non-alphanumeric characters + line = re.sub(r'^[^A-Za-z0-9]+|[^A-Za-z0-9]+$', '', line) + if line.strip() != '': + result.add(line) + return '\n'.join(result) + + def retrieve(self) -> List[ContextRecord]: + """ Retrieves context records from chat history memory + with history cache pruning option. + """ + records = self._chat_history_block.retrieve(self._window_size) + if self._window_size is not None and len(records) == self._window_size: + warnings.warn( + f"Chat history window size limit ({self._window_size}) " + f"reached. Some earlier messages will not be included in " + f"the context. Consider increasing window_size if you need " + f"a longer context.", + UserWarning, + stacklevel=2, + ) + if self.prune_history_tool_calls: + # remove history tool calls label from records + # to to save token usage and improve context quality + # by keeping only the content + pruned_records = [] + for i, record in enumerate(records[:-1]): + # only prune tool calls from past messages + if i == 5: + print("Reached 5th record, stop pruning further.") + if record.memory_record.role_at_backend == OpenAIBackendRole.FUNCTION: + # get the message from FundctionCallingMessage + if (isinstance(record.memory_record.message, FunctionCallingMessage)): + if isinstance(record.memory_record.message.result, dict): + web_content = record.memory_record.message.result.get("snapshot", "") + # we keep the snapshot content only + record.memory_record.message.result["snapshot"] = self.clean_lines(web_content) + elif isinstance(record.memory_record.message.result, str): + record.memory_record.message.result = self.clean_lines( + record.memory_record.message.result + ) + else: + raise ValueError( + "Unsupported type for FunctionCallingMessage.result") + pruned_records.append(record) + else: + pruned_records.append(record) + pruned_records.append(records[-1]) # keep the last message as is + return pruned_records + + def write_records(self, records: List[MemoryRecord]) -> None: + for record in records: + # assign the agent_id to the record + if record.agent_id == "" and self.agent_id is not None: + record.agent_id = self.agent_id + self._chat_history_block.write_records(records) + + def get_context_creator(self) -> BaseContextCreator: + return self._context_creator + + def clear(self) -> None: + self._chat_history_block.clear() + + def clean_tool_calls(self) -> None: + r"""Removes tool call messages from memory. + This method removes all FUNCTION/TOOL role messages and any ASSISTANT + messages that contain tool_calls in their meta_dict to save token + usage. + """ + from camel.types import OpenAIBackendRole + + # Get all messages from storage + record_dicts = self._chat_history_block.storage.load() + if not record_dicts: + return + + # Track indices to remove (reverse order for efficient deletion) + indices_to_remove = [] + + # Identify indices of tool-related messages + for i, record in enumerate(record_dicts): + role = record.get('role_at_backend') + + # Mark FUNCTION messages for removal + if role == OpenAIBackendRole.FUNCTION.value: + indices_to_remove.append(i) + # Mark TOOL messages for removal + elif role == OpenAIBackendRole.TOOL.value: + indices_to_remove.append(i) + # Mark ASSISTANT messages with tool_calls for removal + elif role == OpenAIBackendRole.ASSISTANT.value: + meta_dict = record.get('meta_dict', {}) + if meta_dict and 'tool_calls' in meta_dict: + indices_to_remove.append(i) + + # Remove records in-place + for i in reversed(indices_to_remove): + del record_dicts[i] + + # Save the modified records back to storage + self._chat_history_block.storage.save(record_dicts) \ No newline at end of file diff --git a/examples/agents/agent_browser_call_cache.py b/examples/agents/agent_browser_call_cache.py new file mode 100644 index 0000000000..90ee4480c0 --- /dev/null +++ b/examples/agents/agent_browser_call_cache.py @@ -0,0 +1,264 @@ +"""Example of creating a search agent with browser capabilities and tool call caching.""" +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +import os +import uuid + + +from camel.logger import get_logger + +from camel.agents.chat_agent import ChatAgent +from camel.messages.base import BaseMessage +from camel.models import BaseModelBackend, ModelFactory +from camel.toolkits import ( + ToolkitMessageIntegration, HybridBrowserToolkit) +from camel.types import ModelPlatformType, ModelType +from camel.memories import BrowserChatHistoryMemory + + +logger = get_logger(__name__) +WORKING_DIRECTORY = os.environ.get("CAMEL_WORKDIR") or os.path.abspath( + "working_dir/" +) + + +def send_message_to_user( + message_title: str, + message_description: str, + message_attachment: str = "", +) -> str: + r"""Use this tool to send a tidy message to the user, including a + short title, a one-sentence description, and an optional attachment. + + This one-way tool keeps the user informed about your progress, + decisions, or actions. It does not require a response. + You should use it to: + - Announce what you are about to do. + For example: + message_title="Starting Task" + message_description="Searching for papers on GUI Agents." + - Report the result of an action. + For example: + message_title="Search Complete" + message_description="Found 15 relevant papers." + - Report a created file. + For example: + message_title="File Ready" + message_description="The report is ready for your review." + message_attachment="report.pdf" + - State a decision. + For example: + message_title="Next Step" + message_description="Analyzing the top 10 papers." + - Give a status update during a long-running task. + + Args: + message_title (str): The title of the message. + message_description (str): The short description. + message_attachment (str): The attachment of the message, + which can be a file path or a URL. + + Returns: + str: Confirmation that the message was successfully sent. + """ + print(f"\nAgent Message:\n{message_title} " f"\n{message_description}\n") + if message_attachment: + print(message_attachment) + logger.info( + f"\nAgent Message:\n{message_title} " + f"{message_description} {message_attachment}" + ) + return ( + f"Message successfully sent to user: '{message_title} " + f"{message_description} {message_attachment}'" + ) + + +def search_agent_factory( + model: BaseModelBackend, + task_id: str, +): + r"""Factory for creating a search agent, based on user-provided code + structure. + """ + # Initialize message integration + message_integration = ToolkitMessageIntegration( + message_handler=send_message_to_user + ) + + # Generate a unique identifier for this agent instance + agent_id = str(uuid.uuid4())[:8] + + custom_tools = [ + "browser_open", + "browser_select", + "browser_close", + "browser_back", + "browser_forward", + "browser_click", + "browser_type", + "browser_enter", + "browser_switch_tab", + "browser_visit_page", + "browser_get_page_snapshot", + ] + USER_DATA_DIR = os.getenv("CAMEL_BROWSER_USER_DATA_DIR", "user_data") + web_toolkit_custom = HybridBrowserToolkit( + headless=False, + enabled_tools=custom_tools, + browser_log_to_file=True, + stealth=True, + session_id=agent_id, + viewport_limit=False, + cache_dir=WORKING_DIRECTORY, + default_start_url="about:blank", + user_data_dir=USER_DATA_DIR, + log_dir=os.getenv("CAMEL_BROWSER_USER_LOG_DIR", "browser_logs"), + ) + + # Add messaging to toolkits + web_toolkit_custom = message_integration.register_toolkits( + web_toolkit_custom + ) + + tools = [ + *web_toolkit_custom.get_tools(), + ] + + system_message = """ + +You are a Senior Research Analyst, a key member of a multi-agent team. Your +primary responsibility is to conduct expert-level web research to gather, +analyze, and document information required to solve the user's task. You +operate with precision, efficiency, and a commitment to data quality. + + + +- You MUST use the note-taking tools to record your findings. This is a + critical part of your role. Your notes are the primary source of + information for your teammates. To avoid information loss, you must not + summarize your findings. Instead, record all information in detail. + For every piece of information you gather, you must: + 1. **Extract ALL relevant details**: Quote all important sentences, + statistics, or data points. Your goal is to capture the information + as completely as possible. + 2. **Cite your source**: Include the exact URL where you found the + information. + Your notes should be a detailed and complete record of the information + you have discovered. High-quality, detailed notes are essential for the + team's success. + +- When you complete your task, your final response must be a comprehensive + summary of your findings, presented in a clear, detailed, and + easy-to-read format. Avoid using markdown tables for presenting data; + use plain text formatting instead. + + + +Your capabilities include: +- Search and get information from the web using the search tools. +- Use the rich browser related toolset to investigate websites. + + + +- Browser-Based Exploration: Use the rich browser related toolset to + investigate websites. + + - **Navigation and Exploration**: Use `browser_visit_page` to open a URL. + `browser_visit_page` provides a snapshot of currently visible + interactive elements, not the full page text. To see more content on + long pages, Navigate with `browser_click`, `browser_back`, and + `browser_forward`. Manage multiple pages with `browser_switch_tab`. + - **Analysis**: Use `browser_get_som_screenshot` to understand the page + layout and identify interactive elements. Since this is a heavy + operation, only use it when visual analysis is necessary. + - **Interaction**: Use `browser_type` to fill out forms and + `browser_enter` to submit or confirm search. + +- In your response, you should mention the URLs you have visited and processed. + +- When encountering verification challenges (like login, CAPTCHAs or + robot checks), you MUST request help using the human toolkit. +- When encountering cookies page, you need to click accept all. + +""" + agent = ChatAgent( + system_message=BaseMessage.make_assistant_message( + role_name="Search Agent", + content=system_message, + ), + model=model, + toolkits_to_register_agent=[web_toolkit_custom], + tools=tools, + memory=BrowserChatHistoryMemory + ) + + # Return both agent and toolkit for cleanup purposes + return agent, web_toolkit_custom + + +async def main(): + """Main function to run the search agent with browser capabilities and + tool call caching. + """ + model_backend = ModelFactory.create( + model_platform=ModelPlatformType.OPENAI, + model_type=ModelType.GPT_4_1, + model_config_dict={ + "stream": False, + }, + ) + search_agent, browser_toolkit = search_agent_factory( + model_backend, task_id=1 + ) + try: + task_message = ( + "web: https://en.wikipedia.org/wiki/Agent" + "got this web and get the summary of it" + ) + await search_agent.astep(input_message=task_message) + context = search_agent.memory.get_context() + # there will be 7 messages context: ([7*messages], token_count) + + print("\n--- Agent Pruned Memory Context ---") + # 0, system + print(context[0][0]["content"]) + # 1, user + print(context[0][1]["content"]) + # 2, browse tool call + print(context[0][2]["content"]) + # 3, browser content response + print(context[0][3]["content"]) + # 4, snapeshot tool call + print(context[0][4]["content"]) + # 5, snapshot response + print(context[0][5]["content"]) + # 6, agent final response + print(context[0][6]["content"]) + finally: + # IMPORTANT: Close browser after each task to prevent resource leaks + if browser_toolkit is not None: + try: + print(f"\n--- Closing Browser for Task ---") + await browser_toolkit.browser_close() + print("Browser closed successfully.") + except Exception as e: + print(f"Error closing browser: {e}") + + +if __name__ == "__main__": + import asyncio + logger.info("Starting Search Agent with Browser Call Cache...") + asyncio.run(main()) + logger.info("Search Agent run completed.")