SearchSavior · SearchSavior · Dec 13, 2025 · Nov 2, 2025 · Nov 3, 2025 · Nov 6, 2025
diff --git a/.cursorignore b/.cursorignore
@@ -1,2 +1,7 @@
 # Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv)
 
+assets/
+benchmarks/
+examples/
+openarc_bench.db
+openarc.log
diff --git a/.gitignore b/.gitignore
@@ -21,4 +21,6 @@ docker-compose.override.yaml
 .vscode/
 .builds/
 .cursor/
-openarc_bench.db
+openarc_bench.db
+gpt-oss.ipynb
+gpt_oss_convert.py
diff --git a/README.md b/README.md
@@ -275,22 +275,22 @@ Review [pipeline-paralellism preview](https://docs.openvino.ai/2025/openvino-wor
 ### Multi-GPU Pipeline Paralell
 
 ```
-openarc add --model-name <model-name> --model-path <path/to/model> --engine ovgenai --model-type llm --device <HETERO:GPU.0,GPU.1> --runtime-config {"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}
+openarc add --model-name <model-name> --model-path <path/to/model> --engine ovgenai --model-type llm --device HETERO:GPU.0,GPU.1 --runtime-config "{"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}"
 ```
 
 ### Tensor Paralell (CPU only)
 
 Requires more than one CPU socket in a single node.
 
 ```
-openarc add --model-name <model-name> --model-path <path/to/model> --engine ovgenai --model-type llm --device CPU --runtime-config {"MODEL_DISTRIBUTION_POLICY": "TENSOR_PARALLEL"}
+openarc add --model-name <model-name> --model-path <path/to/model> --engine ovgenai --model-type llm --device CPU --runtime-config "{"MODEL_DISTRIBUTION_POLICY": "TENSOR_PARALLEL"}"
 ```
 ---
 
 ### Hybrid Mode/CPU Offload
 
 ```
-openarc add --model-name <model-name> -model-path <path/to/model> --engine ovgenai --model-type llm --device <HETERO:GPU.0,CPU> --runtime-config {"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}
+openarc add --model-name <model-name> -model-path <path/to/model> --engine ovgenai --model-type llm --device HETERO:GPU.0,CPU --runtime-config "{"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}"
 ```
 
 </details>

diff --git a/demos/hf_explorer/hf_agent.py b/demos/hf_explorer/hf_agent.py
@@ -0,0 +1,157 @@
+import os
+import sys
+import json
+from openai import OpenAI
+from hf_tools import search_huggingface
+
+# Initialize OpenAI client with OpenArc API
+client = OpenAI(
+    api_key=os.getenv("OPENARC_API_KEY"),
+    base_url="http://localhost:8000/v1"
+)
+
+MODEL = "Qwen3-4B-2507"
+
+# Define the tool schema for OpenAI function calling
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "search_huggingface",
+            "description": "Search the Hugging Face Hub for models or datasets. Returns a list of matching items with metadata.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query string to find models or datasets"
+                    },
+                    "search_type": {
+                        "type": "string",
+                        "enum": ["model", "dataset"],
+                        "description": "Whether to search for models or datasets",
+                        "default": "model"
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "Maximum number of results to return",
+                        "default": 10
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    }
+]
+
+
+def execute_tool_call(tool_call):
+    """Execute a tool call and return the result."""
+    function_name = tool_call.function.name
+    arguments = json.loads(tool_call.function.arguments)
+
+    if function_name == "search_huggingface":
+        results = search_huggingface(**arguments)
+        # Format results for the LLM
+        formatted_results = []
+        for result in results:
+            result_dict = {
+                "id": result.id,
+            }
+            if hasattr(result, 'downloads'):
+                result_dict['downloads'] = result.downloads
+            if hasattr(result, 'likes'):
+                result_dict['likes'] = result.likes
+            if hasattr(result, 'tags'):
+                result_dict['tags'] = result.tags[:5] if result.tags else []  # Limit tags
+            formatted_results.append(result_dict)
+
+        return json.dumps(formatted_results, indent=2)
+
+    return json.dumps({"error": "Unknown function"})
+
+
+def chat_loop():
+    """Interactive chat loop with the agent."""
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant that can search the Hugging Face Hub for models and datasets. When users ask about models or datasets, use the search_huggingface function to find relevant results."
+        }
+    ]
+
+    print("HuggingFace Explorer Agent")
+    print("=" * 50)
+    print("Ask me to search for models or datasets on HuggingFace!")
+    print("Type 'exit' or 'quit' to end the conversation.\n")
+
+    while True:
+        # Get user input
+        user_input = input("You: ").strip()
+
+        if user_input.lower() in ['exit', 'quit', 'q']:
+            print("Goodbye!")
+            break
+
+        if not user_input:
+            continue
+
+        # Add user message
+        messages.append({"role": "user", "content": user_input})
+
+        # Get response from LLM
+        response = client.chat.completions.create(
+            model=MODEL,
+            messages=messages,
+            tools=tools,
+            tool_choice="auto"
+        )
+
+        response_message = response.choices[0].message
+        messages.append(response_message)
+
+        # Check if the model wants to call a function
+        if response_message.tool_calls:
+            # Execute all tool calls
+            for tool_call in response_message.tool_calls:
+                print(f"\n[Calling: {tool_call.function.name}]")
+                result = execute_tool_call(tool_call)
+
+                # Add tool result to messages
+                messages.append({
+                    "role": "tool",
+                    "tool_call_id": tool_call.id,
+                    "content": result
+                })
+
+            # Get final response after tool execution
+            final_response = client.chat.completions.create(
+                model=MODEL,
+                messages=messages,
+                tools=tools,
+                tool_choice="auto"
+            )
+
+            final_message = final_response.choices[0].message
+            messages.append(final_message)
+            print(f"\nAssistant: {final_message.content}\n")
+        else:
+            # Direct response without tool call
+            print(f"\nAssistant: {response_message.content}\n")
+
+
+def main():
+    """Main entrypoint."""
+    try:
+        chat_loop()
+    except KeyboardInterrupt:
+        print("\n\nGoodbye!")
+        sys.exit(0)
+    except Exception as e:
+        print(f"\nError: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/demos/hf_explorer/hf_tools.py b/demos/hf_explorer/hf_tools.py
@@ -0,0 +1,55 @@
+import os
+from huggingface_hub import HfApi
+
+hf_api_key = os.getenv("HF_TOKEN")
+
+def search_huggingface(query: str, search_type: str = "model", limit: int = 10):
+    """
+    Search the Hugging Face Hub for models or datasets.
+
+    Args:
+        query: Search query string
+        search_type: Either "model" or "dataset"
+        limit: Maximum number of results to return
+
+    Returns:
+        List of search results with metadata
+    """
+    api = HfApi(token=hf_api_key)
+
+    if search_type == "model":
+        results = api.list_models(search=query, limit=limit)
+    elif search_type == "dataset":
+        results = api.list_datasets(search=query, limit=limit)
+    else:
+        raise ValueError("search_type must be 'model' or 'dataset'")
+
+    return list(results)
+
+
+def main():
+    """Main entrypoint for the HF explorer CLI."""
+    import sys
+
+    if len(sys.argv) < 2:
+        print("Usage: python hf_tools.py <query> [search_type] [limit]")
+        print("  search_type: 'model' (default) or 'dataset'")
+        print("  limit: number of results (default: 10)")
+        sys.exit(1)
+
+    query = sys.argv[1]
+    search_type = sys.argv[2] if len(sys.argv) > 2 else "model"
+    limit = int(sys.argv[3]) if len(sys.argv) > 3 else 10
+
+    results = search_huggingface(query, search_type, limit)
+
+    print(f"\nSearch results for '{query}' ({search_type}s):\n")
+    for i, result in enumerate(results, 1):
+        print(f"{i}. {result.id}")
+        if hasattr(result, 'downloads'):
+            print(f"   Downloads: {result.downloads}")
+        print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/demos/smolagents/smolagent_python_interpreter.py b/demos/smolagents/smolagent_python_interpreter.py
@@ -0,0 +1,56 @@
+import os
+from smolagents import CodeAgent, LiteLLMModel
+from smolagents.default_tools import PythonInterpreterTool
+
+
+def main():
+    """Main entrypoint for interactive smolagent."""
+    # Initialize the model using LiteLLM with OpenAI provider
+    model = LiteLLMModel(
+        model_id="openai/Muse-12B",
+        api_key=os.getenv("OPENARC_API_KEY"),
+        api_base="http://localhost:8000/v1"
+    )
+
+    # Initialize only Python interpreter tool
+    python_tool = PythonInterpreterTool()
+
+    # Create the agent with only Python interpreter
+    agent = CodeAgent(
+        tools=[python_tool],
+        model=model,
+        max_steps=10
+    )
+
+    print("🤖 Smolagent with Python Interpreter")
+    print("=" * 60)
+    print("I can execute Python code!")
+    print("Type 'exit' or 'quit' to end the conversation.\n")
+
+    while True:
+        try:
+            # Get user input
+            user_input = input("You: ").strip()
+
+            if user_input.lower() in ['exit', 'quit', 'q']:
+                print("Goodbye!")
+                break
+
+            if not user_input:
+                continue
+
+            # Run the agent
+            print("\n🔄 Processing...\n")
+            result = agent.run(user_input)
+            print(f"\n🤖 Assistant: {result}\n")
+
+        except KeyboardInterrupt:
+            print("\n\nGoodbye!")
+            break
+        except Exception as e:
+            print(f"\n❌ Error: {e}\n")
+            import traceback
+            traceback.print_exc()
+
+if __name__ == "__main__":
+    main()