feat(lighthouse): filter out non-compatible OpenAI models (prowler-cloud#9523)

Chandrapal Badshah · 0xbadshah · AdriiiPRodri · Hamza-abughazaleh · commit f79a5938dd73 · 2026-02-11T11:11:42.000+03:00
Co-authored-by: Chandrapal Badshah &lt;12944530+Chan9390@users.noreply.github.com&gt;
Co-authored-by: Adrián Jesús Peña Rodríguez &lt;adrianjpr@gmail.com&gt;
diff --git a/api/CHANGELOG.md b/api/CHANGELOG.md
@@ -10,6 +10,7 @@ All notable changes to the **Prowler API** are documented in this file.
 
 ### Changed
 - Endpoint `GET /overviews/attack-surfaces` no longer returns the related check IDs [(#9529)](https://github.com/prowler-cloud/prowler/pull/9529)
+- OpenAI provider to only load chat-compatible models with tool calling support [(#9523)](https://github.com/prowler-cloud/prowler/pull/9523)
 
 ---
 
diff --git a/api/src/backend/tasks/jobs/lighthouse_providers.py b/api/src/backend/tasks/jobs/lighthouse_providers.py
@@ -11,6 +11,41 @@
 
 logger = get_task_logger(__name__)
 
+# OpenAI model prefixes to exclude from Lighthouse model selection.
+# These models don't support text chat completions and tool calling.
+EXCLUDED_OPENAI_MODEL_PREFIXES = (
+    "dall-e",  # Image generation
+    "whisper",  # Audio transcription
+    "tts-",  # Text-to-speech (tts-1, tts-1-hd, etc.)
+    "sora",  # Text-to-video (sora-2, sora-2-pro, etc.)
+    "text-embedding",  # Embeddings
+    "embedding",  # Embeddings (alternative naming)
+    "text-moderation",  # Content moderation
+    "omni-moderation",  # Content moderation
+    "text-davinci",  # Legacy completion models
+    "text-curie",  # Legacy completion models
+    "text-babbage",  # Legacy completion models
+    "text-ada",  # Legacy completion models
+    "davinci",  # Legacy completion models
+    "curie",  # Legacy completion models
+    "babbage",  # Legacy completion models
+    "ada",  # Legacy completion models
+    "computer-use",  # Computer control agent
+    "gpt-image",  # Image generation
+    "gpt-audio",  # Audio models
+    "gpt-realtime",  # Realtime voice API
+)
+
+# OpenAI model substrings to exclude (patterns that can appear anywhere in model ID).
+# These patterns identify non-chat model variants.
+EXCLUDED_OPENAI_MODEL_SUBSTRINGS = (
+    "-audio-",  # Audio preview models (gpt-4o-audio-preview, etc.)
+    "-realtime-",  # Realtime preview models (gpt-4o-realtime-preview, etc.)
+    "-transcribe",  # Transcription models (gpt-4o-transcribe, etc.)
+    "-tts",  # TTS models (gpt-4o-mini-tts)
+    "-instruct",  # Legacy instruct models (gpt-3.5-turbo-instruct, etc.)
+)
+
 
 def _extract_error_message(e: Exception) -> str:
     """
@@ -283,20 +318,41 @@ def _fetch_openai_models(api_key: str) -> Dict[str, str]:
     """
     Fetch available models from OpenAI API.
 
+    Filters out models that don't support text input/output and tool calling,
+    such as image generation (DALL-E), audio transcription (Whisper),
+    text-to-speech (TTS), embeddings, and moderation models.
+
     Args:
         api_key: OpenAI API key for authentication.
 
     Returns:
         Dict mapping model_id to model_name. For OpenAI, both are the same
-        as the API doesn't provide separate display names.
+        as the API doesn't provide separate display names. Only includes
+        models that support text input, text output or tool calling.
 
     Raises:
         Exception: If the API call fails.
     """
     client = openai.OpenAI(api_key=api_key)
     models = client.models.list()
-    # OpenAI uses model.id for both ID and display name
-    return {m.id: m.id for m in getattr(models, "data", [])}
+
+    # Filter models to only include those supporting chat completions + tool calling
+    filtered_models = {}
+    for model in getattr(models, "data", []):
+        model_id = model.id
+
+        # Skip if model ID starts with excluded prefixes
+        if model_id.startswith(EXCLUDED_OPENAI_MODEL_PREFIXES):
+            continue
+
+        # Skip if model ID contains excluded substrings
+        if any(substring in model_id for substring in EXCLUDED_OPENAI_MODEL_SUBSTRINGS):
+            continue
+
+        # Include model (supports chat completions + tool calling)
+        filtered_models[model_id] = model_id
+
+    return filtered_models
 
 
 def _fetch_openai_compatible_models(base_url: str, api_key: str) -> Dict[str, str]: