Python SDK for real-time video AI inference using the Overshoot Media Gateway API.
- Real-time video streaming via WebRTC
- AI inference on video frames with configurable prompts
- Action detection with built-in storage and querying
- Stream relay for external video sources (mobile apps, WebSocket feeds)
- Cross-platform camera support using OpenCV (Windows, macOS, Linux)
- Frame preprocessing with custom callbacks (OpenCV, MediaPipe, etc.)
- Multiple video sources: camera or video files
- Structured output with JSON schema support
- Async/await support with context managers
pip install aiohttp aiortc opencv-python numpyOptional dependencies:
pip install mediapipe # For hand/pose/face detection preprocessing
pip install python-dotenv # For .env file support| Component | Description |
|---|---|
RealtimeVision |
Stream from local camera/video to Overshoot |
OvershootStreamRelay |
Relay frames from external sources to Overshoot |
ActionDetector |
High-level action detection with storage |
ActionStore |
Thread-safe storage for detected actions |
OvershootHttpClient |
Low-level HTTP client |
import asyncio
from overshoot import RealtimeVision, RealtimeVisionConfig
async def main():
config = RealtimeVisionConfig(
api_url="https://cluster1.overshoot.ai/api/v0.2",
api_key="your-api-key",
prompt="Describe what you see in this video",
on_result=lambda r: print(f"Result: {r.result}"),
)
async with RealtimeVision(config) as vision:
await asyncio.sleep(60) # Stream for 60 seconds
asyncio.run(main())Detect specific actions in video with automatic storage and querying.
import asyncio
from overshoot import ActionDetector
async def main():
detector = ActionDetector(
api_url="https://cluster1.overshoot.ai/api/v0.2",
api_key="your-api-key",
actions=["waving hand", "thumbs up", "pointing", "clapping"],
min_confidence=0.6,
on_action=lambda a: print(f"Detected: {a.action} ({a.confidence:.0%})"),
)
# Start real-time detection
await detector.start()
await asyncio.sleep(60)
await detector.stop()
# Query detected actions
waves = detector.get_actions(action="waving hand")
recent = detector.get_actions(last_seconds=10)
high_conf = detector.get_actions(min_confidence=0.8)
# Get summary
print(detector.summary()) # {"waving hand": 5, "thumbs up": 2}
# Export results
detector.export("results.json", format="json")
detector.export("results.csv", format="csv")
asyncio.run(main())await detector.analyze_video("path/to/video.mp4")
for action in detector.actions:
print(f"[{action.timestamp:.1f}s] {action.action} ({action.confidence:.0%})")Thread-safe storage for detected actions with querying and export.
from overshoot import ActionStore, DetectedAction
store = ActionStore()
# Add actions
store.add(DetectedAction("waving", timestamp=1.5, frame_number=45, confidence=0.92))
store.add(DetectedAction("thumbs up", timestamp=3.2, frame_number=96, confidence=0.85))
# Query with filters
all_waves = store.get_actions(action="waving")
time_range = store.get_actions(start_time=1.0, end_time=5.0)
high_conf = store.get_actions(min_confidence=0.9)
recent = store.get_actions(last_seconds=10)
# Combine filters
combined = store.get_actions(action="waving", min_confidence=0.8)
# Summary statistics
print(store.summary()) # {"waving": 1, "thumbs up": 1}
print(len(store)) # 2
# Export
store.export_json("actions.json")
store.export_csv("actions.csv")@dataclass
class DetectedAction:
action: str # Action name (e.g., "waving hand")
timestamp: float # Seconds from stream start
frame_number: int # Frame index
confidence: float # 0.0 to 1.0
duration: Optional[float] = None
metadata: dict = {} # Latency info, custom dataRelay video frames from external sources (mobile apps, WebSocket feeds) to Overshoot.
import asyncio
from overshoot import OvershootStreamRelay
async def main():
relay = OvershootStreamRelay(
api_url="https://cluster1.overshoot.ai/api/v0.2",
api_key="your-api-key",
prompt="Describe what you see",
on_result=lambda r: print(r["result"]),
)
await relay.start()
# Push frames from your source (RGB24 numpy array)
relay.push_frame(frame_data, timestamp=time.time())
await relay.stop()
asyncio.run(main())Combine StreamRelay with ActionStore for action detection on external video sources:
import asyncio
import json
from overshoot import OvershootStreamRelay, ActionStore, DetectedAction
store = ActionStore()
def on_result(result):
"""Parse inference results and add to store."""
if result.get("type") != "inference":
return
data = json.loads(result["result"])
for action_data in data.get("detected_actions", []):
if action_data.get("detected") and action_data.get("confidence", 0) >= 0.6:
store.add(DetectedAction(
action=action_data["action"],
timestamp=result.get("timestamp", 0),
frame_number=0,
confidence=action_data["confidence"],
))
# Action detection schema
output_schema = {
"type": "object",
"properties": {
"detected_actions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["waving", "thumbs up"]},
"confidence": {"type": "number"},
"detected": {"type": "boolean"},
},
"required": ["action", "confidence", "detected"],
},
},
},
}
relay = OvershootStreamRelay(
api_url="https://cluster1.overshoot.ai/api/v0.2",
api_key="your-api-key",
prompt="Detect waving or thumbs up gestures",
on_result=on_result,
output_schema=output_schema,
)
async with relay:
# Push frames...
pass
# Query results
print(store.summary())
store.export_json("results.json")| Parameter | Type | Default | Description |
|---|---|---|---|
api_url |
str |
Required | API endpoint URL |
api_key |
str |
Required | Your API key |
prompt |
str |
Required | AI prompt |
on_result |
Callable |
Required | Result callback |
on_error |
Callable |
None | Error callback |
model |
str |
"gemini-2.0-flash" |
Model name |
backend |
str |
"gemini" |
Backend name |
output_schema |
dict |
None | JSON schema for structured output |
width |
int |
640 | Frame width |
height |
int |
480 | Frame height |
fps |
int |
15 | Frames per second |
sampling_ratio |
float |
0.8 | Frame sampling ratio |
clip_length_seconds |
float |
0.5 | Clip duration |
delay_seconds |
float |
0.5 | Processing delay |
Live camera with action detection overlay:
python demo_action_detector.pyFeatures:
- Live camera preview with action overlay
- Detected actions displayed with confidence bars
- Press
Sfor store summary,Eto export,Qto quit
StreamRelay with ActionStore integration:
python demo_stream_relay_actions.pyFeatures:
- Push frames via StreamRelay
- Action detection with structured output
- Results stored in ActionStore
- Live overlay display
from overshoot import CameraSource
source = CameraSource() # Default camera
source = CameraSource(device_index=1) # Second camera
source = CameraSource(frame_processor=fn) # With preprocessingfrom overshoot import VideoFileSource
source = VideoFileSource(file_path="/path/to/video.mp4")Process frames before streaming using the frame_processor callback.
import cv2
from datetime import datetime
def add_timestamp(frame):
timestamp = datetime.now().strftime("%H:%M:%S")
cv2.putText(frame, timestamp, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
return frame
source = CameraSource(frame_processor=add_timestamp)import cv2
import mediapipe as mp
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands()
def add_hand_landmarks(frame):
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(rgb)
if results.multi_hand_landmarks:
for hand in results.multi_hand_landmarks:
mp_draw.draw_landmarks(frame, hand, mp_hands.HAND_CONNECTIONS)
return frame
source = CameraSource(frame_processor=add_hand_landmarks)| Parameter | Type | Required | Description |
|---|---|---|---|
api_url |
str |
Yes | API endpoint URL |
api_key |
str |
Yes | Your API key |
prompt |
str |
Yes | AI prompt for video analysis |
on_result |
Callable |
Yes | Callback for inference results |
source |
StreamSource |
No | Video source (camera or file) |
backend |
str |
No | "overshoot" or "gemini" |
model |
str |
No | Model name |
output_schema |
dict |
No | JSON schema for structured output |
on_error |
Callable |
No | Error callback |
processing |
dict |
No | Processing parameters |
debug |
bool |
No | Enable debug logging |
config = RealtimeVisionConfig(
# ... required params ...
processing={
"sampling_ratio": 0.1, # Frame sampling (0.0-1.0)
"fps": 30, # Frames per second (1-120)
"clip_length_seconds": 1.0, # Clip duration (0.1-60.0)
"delay_seconds": 1.0, # Processing delay (0.0-60.0)
},
)Use JSON schema for structured responses:
config = RealtimeVisionConfig(
# ... required params ...
output_schema={
"type": "object",
"properties": {
"objects": {
"type": "array",
"items": {"type": "string"}
},
"count": {"type": "integer"}
}
},
)from overshoot import (
ApiError,
ValidationError,
UnauthorizedError,
NotFoundError,
ServerError,
NetworkError,
)
try:
async with RealtimeVision(config) as vision:
await asyncio.sleep(60)
except UnauthorizedError:
print("Invalid API key")
except ValidationError as e:
print(f"Invalid configuration: {e.message}")
print(f"Details: {e.details}")
except NetworkError as e:
print(f"Connection failed: {e}")
except ApiError as e:
print(f"API error {e.status_code}: {e.message}")| Method | Description |
|---|---|
start() |
Start the video stream |
stop() |
Stop the stream and release resources |
update_prompt(prompt) |
Update the AI prompt |
submit_feedback(rating, category, feedback) |
Submit feedback |
get_stream_id() |
Get current stream ID |
is_active() |
Check if stream is running |
| Method | Description |
|---|---|
start() |
Start real-time detection from camera |
stop() |
Stop detection |
analyze_video(path) |
Analyze a video file |
get_actions(**filters) |
Query detected actions |
summary() |
Get action counts by type |
export(path, format) |
Export to JSON or CSV |
clear() |
Clear stored actions |
| Method | Description |
|---|---|
add(action) |
Add a detected action |
add_many(actions) |
Add multiple actions |
get_actions(**filters) |
Query with filters |
summary() |
Get counts by action type |
export_json(path) |
Export to JSON |
export_csv(path) |
Export to CSV |
clear() |
Remove all actions |
| Method | Description |
|---|---|
start() |
Start the relay |
stop() |
Stop and release resources |
push_frame(frame, timestamp) |
Push a frame to stream |
update_prompt(prompt) |
Update the AI prompt |
overshoot/
├── __init__.py # Public API exports
├── constants.py # Default values and limits
├── exceptions.py # Error classes
├── types.py # Data classes
├── http_client.py # Low-level HTTP client
├── realtime_vision.py # RealtimeVision + OpenCVCameraTrack
├── stream_relay.py # OvershootStreamRelay
├── action_detector.py # ActionDetector
└── action_store.py # ActionStore
- Python 3.9+
- aiohttp
- aiortc
- opencv-python
- numpy
Optional:
- mediapipe (for ML preprocessing)
- python-dotenv (for .env support)
MIT