Skip to content

Commit f79a593

Browse files
Chandrapal Badshah0xbadshahAdriiiPRodri
authored andcommitted
feat(lighthouse): filter out non-compatible OpenAI models (prowler-cloud#9523)
Co-authored-by: Chandrapal Badshah <[email protected]> Co-authored-by: Adrián Jesús Peña Rodríguez <[email protected]>
1 parent 63f10fe commit f79a593

2 files changed

Lines changed: 60 additions & 3 deletions

File tree

api/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ All notable changes to the **Prowler API** are documented in this file.
1010

1111
### Changed
1212
- Endpoint `GET /overviews/attack-surfaces` no longer returns the related check IDs [(#9529)](https://github.com/prowler-cloud/prowler/pull/9529)
13+
- OpenAI provider to only load chat-compatible models with tool calling support [(#9523)](https://github.com/prowler-cloud/prowler/pull/9523)
1314

1415
---
1516

api/src/backend/tasks/jobs/lighthouse_providers.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,41 @@
1111

1212
logger = get_task_logger(__name__)
1313

14+
# OpenAI model prefixes to exclude from Lighthouse model selection.
15+
# These models don't support text chat completions and tool calling.
16+
EXCLUDED_OPENAI_MODEL_PREFIXES = (
17+
"dall-e", # Image generation
18+
"whisper", # Audio transcription
19+
"tts-", # Text-to-speech (tts-1, tts-1-hd, etc.)
20+
"sora", # Text-to-video (sora-2, sora-2-pro, etc.)
21+
"text-embedding", # Embeddings
22+
"embedding", # Embeddings (alternative naming)
23+
"text-moderation", # Content moderation
24+
"omni-moderation", # Content moderation
25+
"text-davinci", # Legacy completion models
26+
"text-curie", # Legacy completion models
27+
"text-babbage", # Legacy completion models
28+
"text-ada", # Legacy completion models
29+
"davinci", # Legacy completion models
30+
"curie", # Legacy completion models
31+
"babbage", # Legacy completion models
32+
"ada", # Legacy completion models
33+
"computer-use", # Computer control agent
34+
"gpt-image", # Image generation
35+
"gpt-audio", # Audio models
36+
"gpt-realtime", # Realtime voice API
37+
)
38+
39+
# OpenAI model substrings to exclude (patterns that can appear anywhere in model ID).
40+
# These patterns identify non-chat model variants.
41+
EXCLUDED_OPENAI_MODEL_SUBSTRINGS = (
42+
"-audio-", # Audio preview models (gpt-4o-audio-preview, etc.)
43+
"-realtime-", # Realtime preview models (gpt-4o-realtime-preview, etc.)
44+
"-transcribe", # Transcription models (gpt-4o-transcribe, etc.)
45+
"-tts", # TTS models (gpt-4o-mini-tts)
46+
"-instruct", # Legacy instruct models (gpt-3.5-turbo-instruct, etc.)
47+
)
48+
1449

1550
def _extract_error_message(e: Exception) -> str:
1651
"""
@@ -283,20 +318,41 @@ def _fetch_openai_models(api_key: str) -> Dict[str, str]:
283318
"""
284319
Fetch available models from OpenAI API.
285320
321+
Filters out models that don't support text input/output and tool calling,
322+
such as image generation (DALL-E), audio transcription (Whisper),
323+
text-to-speech (TTS), embeddings, and moderation models.
324+
286325
Args:
287326
api_key: OpenAI API key for authentication.
288327
289328
Returns:
290329
Dict mapping model_id to model_name. For OpenAI, both are the same
291-
as the API doesn't provide separate display names.
330+
as the API doesn't provide separate display names. Only includes
331+
models that support text input, text output or tool calling.
292332
293333
Raises:
294334
Exception: If the API call fails.
295335
"""
296336
client = openai.OpenAI(api_key=api_key)
297337
models = client.models.list()
298-
# OpenAI uses model.id for both ID and display name
299-
return {m.id: m.id for m in getattr(models, "data", [])}
338+
339+
# Filter models to only include those supporting chat completions + tool calling
340+
filtered_models = {}
341+
for model in getattr(models, "data", []):
342+
model_id = model.id
343+
344+
# Skip if model ID starts with excluded prefixes
345+
if model_id.startswith(EXCLUDED_OPENAI_MODEL_PREFIXES):
346+
continue
347+
348+
# Skip if model ID contains excluded substrings
349+
if any(substring in model_id for substring in EXCLUDED_OPENAI_MODEL_SUBSTRINGS):
350+
continue
351+
352+
# Include model (supports chat completions + tool calling)
353+
filtered_models[model_id] = model_id
354+
355+
return filtered_models
300356

301357

302358
def _fetch_openai_compatible_models(base_url: str, api_key: str) -> Dict[str, str]:

0 commit comments

Comments
 (0)