Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .cursorignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
# Add directories or file patterns to ignore during indexing (e.g. foo/ or *.csv)

assets/
benchmarks/
examples/
openarc_bench.db
openarc.log
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ docker-compose.override.yaml
.vscode/
.builds/
.cursor/
openarc_bench.db
openarc_bench.db
gpt-oss.ipynb
gpt_oss_convert.py
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,22 +275,22 @@ Review [pipeline-paralellism preview](https://docs.openvino.ai/2025/openvino-wor
### Multi-GPU Pipeline Paralell

```
openarc add --model-name <model-name> --model-path <path/to/model> --engine ovgenai --model-type llm --device <HETERO:GPU.0,GPU.1> --runtime-config {"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}
openarc add --model-name <model-name> --model-path <path/to/model> --engine ovgenai --model-type llm --device HETERO:GPU.0,GPU.1 --runtime-config "{"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}"
```

### Tensor Paralell (CPU only)

Requires more than one CPU socket in a single node.

```
openarc add --model-name <model-name> --model-path <path/to/model> --engine ovgenai --model-type llm --device CPU --runtime-config {"MODEL_DISTRIBUTION_POLICY": "TENSOR_PARALLEL"}
openarc add --model-name <model-name> --model-path <path/to/model> --engine ovgenai --model-type llm --device CPU --runtime-config "{"MODEL_DISTRIBUTION_POLICY": "TENSOR_PARALLEL"}"
```
---

### Hybrid Mode/CPU Offload

```
openarc add --model-name <model-name> -model-path <path/to/model> --engine ovgenai --model-type llm --device <HETERO:GPU.0,CPU> --runtime-config {"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}
openarc add --model-name <model-name> -model-path <path/to/model> --engine ovgenai --model-type llm --device HETERO:GPU.0,CPU --runtime-config "{"MODEL_DISTRIBUTION_POLICY": "PIPELINE_PARALLEL"}"
```

</details>
Expand Down
157 changes: 157 additions & 0 deletions demos/hf_explorer/hf_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import os
import sys
import json
from openai import OpenAI
from hf_tools import search_huggingface

# Initialize OpenAI client with OpenArc API
client = OpenAI(
api_key=os.getenv("OPENARC_API_KEY"),
base_url="http://localhost:8000/v1"
)

MODEL = "Qwen3-4B-2507"

# Define the tool schema for OpenAI function calling
tools = [
{
"type": "function",
"function": {
"name": "search_huggingface",
"description": "Search the Hugging Face Hub for models or datasets. Returns a list of matching items with metadata.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query string to find models or datasets"
},
"search_type": {
"type": "string",
"enum": ["model", "dataset"],
"description": "Whether to search for models or datasets",
"default": "model"
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return",
"default": 10
}
},
"required": ["query"]
}
}
}
]


def execute_tool_call(tool_call):
"""Execute a tool call and return the result."""
function_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)

if function_name == "search_huggingface":
results = search_huggingface(**arguments)
# Format results for the LLM
formatted_results = []
for result in results:
result_dict = {
"id": result.id,
}
if hasattr(result, 'downloads'):
result_dict['downloads'] = result.downloads
if hasattr(result, 'likes'):
result_dict['likes'] = result.likes
if hasattr(result, 'tags'):
result_dict['tags'] = result.tags[:5] if result.tags else [] # Limit tags
formatted_results.append(result_dict)

return json.dumps(formatted_results, indent=2)

return json.dumps({"error": "Unknown function"})


def chat_loop():
"""Interactive chat loop with the agent."""
messages = [
{
"role": "system",
"content": "You are a helpful assistant that can search the Hugging Face Hub for models and datasets. When users ask about models or datasets, use the search_huggingface function to find relevant results."
}
]

print("HuggingFace Explorer Agent")
print("=" * 50)
print("Ask me to search for models or datasets on HuggingFace!")
print("Type 'exit' or 'quit' to end the conversation.\n")

while True:
# Get user input
user_input = input("You: ").strip()

if user_input.lower() in ['exit', 'quit', 'q']:
print("Goodbye!")
break

if not user_input:
continue

# Add user message
messages.append({"role": "user", "content": user_input})

# Get response from LLM
response = client.chat.completions.create(
model=MODEL,
messages=messages,
tools=tools,
tool_choice="auto"
)

response_message = response.choices[0].message
messages.append(response_message)

# Check if the model wants to call a function
if response_message.tool_calls:
# Execute all tool calls
for tool_call in response_message.tool_calls:
print(f"\n[Calling: {tool_call.function.name}]")
result = execute_tool_call(tool_call)

# Add tool result to messages
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": result
})

# Get final response after tool execution
final_response = client.chat.completions.create(
model=MODEL,
messages=messages,
tools=tools,
tool_choice="auto"
)

final_message = final_response.choices[0].message
messages.append(final_message)
print(f"\nAssistant: {final_message.content}\n")
else:
# Direct response without tool call
print(f"\nAssistant: {response_message.content}\n")


def main():
"""Main entrypoint."""
try:
chat_loop()
except KeyboardInterrupt:
print("\n\nGoodbye!")
sys.exit(0)
except Exception as e:
print(f"\nError: {e}")
sys.exit(1)


if __name__ == "__main__":
main()

55 changes: 55 additions & 0 deletions demos/hf_explorer/hf_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import os
from huggingface_hub import HfApi

hf_api_key = os.getenv("HF_TOKEN")

def search_huggingface(query: str, search_type: str = "model", limit: int = 10):
"""
Search the Hugging Face Hub for models or datasets.

Args:
query: Search query string
search_type: Either "model" or "dataset"
limit: Maximum number of results to return

Returns:
List of search results with metadata
"""
api = HfApi(token=hf_api_key)

if search_type == "model":
results = api.list_models(search=query, limit=limit)
elif search_type == "dataset":
results = api.list_datasets(search=query, limit=limit)
else:
raise ValueError("search_type must be 'model' or 'dataset'")

return list(results)


def main():
"""Main entrypoint for the HF explorer CLI."""
import sys

if len(sys.argv) < 2:
print("Usage: python hf_tools.py <query> [search_type] [limit]")
print(" search_type: 'model' (default) or 'dataset'")
print(" limit: number of results (default: 10)")
sys.exit(1)

query = sys.argv[1]
search_type = sys.argv[2] if len(sys.argv) > 2 else "model"
limit = int(sys.argv[3]) if len(sys.argv) > 3 else 10

results = search_huggingface(query, search_type, limit)

print(f"\nSearch results for '{query}' ({search_type}s):\n")
for i, result in enumerate(results, 1):
print(f"{i}. {result.id}")
if hasattr(result, 'downloads'):
print(f" Downloads: {result.downloads}")
print()


if __name__ == "__main__":
main()
56 changes: 56 additions & 0 deletions demos/smolagents/smolagent_python_interpreter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
from smolagents import CodeAgent, LiteLLMModel
from smolagents.default_tools import PythonInterpreterTool


def main():
"""Main entrypoint for interactive smolagent."""
# Initialize the model using LiteLLM with OpenAI provider
model = LiteLLMModel(
model_id="openai/Muse-12B",
api_key=os.getenv("OPENARC_API_KEY"),
api_base="http://localhost:8000/v1"
)

# Initialize only Python interpreter tool
python_tool = PythonInterpreterTool()

# Create the agent with only Python interpreter
agent = CodeAgent(
tools=[python_tool],
model=model,
max_steps=10
)

print("🤖 Smolagent with Python Interpreter")
print("=" * 60)
print("I can execute Python code!")
print("Type 'exit' or 'quit' to end the conversation.\n")

while True:
try:
# Get user input
user_input = input("You: ").strip()

if user_input.lower() in ['exit', 'quit', 'q']:
print("Goodbye!")
break

if not user_input:
continue

# Run the agent
print("\n🔄 Processing...\n")
result = agent.run(user_input)
print(f"\n🤖 Assistant: {result}\n")

except KeyboardInterrupt:
print("\n\nGoodbye!")
break
except Exception as e:
print(f"\n❌ Error: {e}\n")
import traceback
traceback.print_exc()

if __name__ == "__main__":
main()
Loading