huggingface · aymeric-roucher · May 20, 2025 · May 16, 2025 · May 16, 2025 · May 19, 2025
diff --git a/examples/agent_from_any_llm.py b/examples/agent_from_any_llm.py
@@ -1,19 +1,23 @@
-from smolagents import InferenceClientModel, LiteLLMModel, OpenAIServerModel, TransformersModel, tool
-from smolagents.agents import CodeAgent, ToolCallingAgent
+from smolagents import (
+    CodeAgent,
+    InferenceClientModel,
+    LiteLLMModel,
+    OpenAIServerModel,
+    ToolCallingAgent,
+    TransformersModel,
+    tool,
+)
 
 
 # Choose which inference type to use!
 
-available_inferences = ["hf_api", "hf_api_provider", "transformers", "ollama", "litellm", "openai"]
-chosen_inference = "hf_api_provider"
+available_inferences = ["inference_client", "transformers", "ollama", "litellm", "openai"]
+chosen_inference = "inference_client"
 
 print(f"Chose model: '{chosen_inference}'")
 
-if chosen_inference == "hf_api":
-    model = InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
-
-elif chosen_inference == "hf_api_provider":
-    model = InferenceClientModel(provider="together")
+if chosen_inference == "inference_client":
+    model = InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct", provider="nebius")
 
 elif chosen_inference == "transformers":
     model = TransformersModel(model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", device_map="auto", max_new_tokens=1000)

diff --git a/examples/gradio_ui.py b/examples/gradio_ui.py
@@ -9,7 +9,8 @@
     name="example_agent",
     description="This is an example agent.",
     step_callbacks=[],
-    stream_outputs=False,
+    stream_outputs=True,
+    return_full_result=True,
 )
 
 GradioUI(agent, file_upload_folder="./data").launch()
diff --git a/examples/inspect_multiagent_run.py b/examples/inspect_multiagent_run.py
@@ -16,18 +16,24 @@
 
 
 # Then we run the agentic part!
-model = InferenceClientModel()
+model = InferenceClientModel(provider="nebius")
 
 search_agent = ToolCallingAgent(
     tools=[WebSearchTool(), VisitWebpageTool()],
     model=model,
     name="search_agent",
     description="This is an agent that can do web search.",
+    return_full_result=True,
 )
 
 manager_agent = CodeAgent(
     tools=[],
     model=model,
     managed_agents=[search_agent],
+    return_full_result=True,
 )
-manager_agent.run("If the US keeps it 2024 growth rate, how many years would it take for the GDP to double?")
+run_result = manager_agent.run(
+    "If the US keeps it 2024 growth rate, how many years would it take for the GDP to double?"
+)
+print("Here is the token usage for the manager agent", run_result.token_usage)
+print("Here are the timing informations for the manager agent:", run_result.timing)
diff --git a/examples/multi_llm_agent.py b/examples/multi_llm_agent.py
@@ -39,6 +39,8 @@
     model_list=llm_loadbalancer_model_list,
     client_kwargs={"routing_strategy": "simple-shuffle"},
 )
-agent = CodeAgent(tools=[WebSearchTool()], model=model, stream_outputs=True)
+agent = CodeAgent(tools=[WebSearchTool()], model=model, stream_outputs=True, return_full_results=True)
 
-agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
+full_result = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?")
+
+print(full_result)