Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions amadeusgpt/analysis_objects/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
import numpy as np
import openai
from openai import OpenAI
from pydantic import ValidationError

from amadeusgpt.programs.sandbox import Sandbox
from amadeusgpt.system_prompts.visual_llm import VlmInferenceOutput
from amadeusgpt.utils import AmadeusLogger, QA_Message, create_qa_message
from amadeusgpt.utils.openai_adapter import OpenAIAdapter

Expand Down Expand Up @@ -267,13 +269,31 @@ def speak(self, sandbox: Sandbox, image: np.ndarray):
print("description of the image frame provided")
print(text)

thinking_pattern = r'<think>.*?</think>'
output_text = re.sub(thinking_pattern, '', text, flags=re.DOTALL)

print(f"output text after removing thinking: {output_text}")

pattern = r"```json(.*?)```"
if len(re.findall(pattern, text, re.DOTALL)) == 0:
raise ValueError("can't parse the json string correctly", text)
if len(re.findall(pattern, output_text, re.DOTALL)) == 0:
raise ValueError("can't parse the json string correctly", output_text)
else:
json_string = re.findall(pattern, text, re.DOTALL)[0]
json_obj = json.loads(json_string)
return json_obj
results = []
for response_json in re.findall(pattern, output_text, re.DOTALL):
try:
json_obj = json.loads(response_json)
VlmInferenceOutput.model_validate(json_obj)
results.append(json_obj)
except ValidationError as val_err:
print(f"Couldn't validate the json string correctly for {response_json}", val_err)
except Exception as e:
print(f"Couldn't parse the json string correctly for {response_json}", e)
raise e
if len(results) == 0:
raise ValueError("can't parse the json string correctly", output_text)
elif len(results) > 1:
print("WARNING!! Found multiple json strings. Returning only the first", results)
return results[0]


class CodeGenerationLLM(LLM):
Expand Down
9 changes: 9 additions & 0 deletions amadeusgpt/system_prompts/visual_llm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
from pydantic import BaseModel
from typing import List, Literal

class VlmInferenceOutput(BaseModel):
description: str
individuals: int
species: Literal["topview_mouse", "sideview_quadruped", "others"]
background_objects: List[str]

def _get_system_prompt():
system_prompt = """
Describe what you see in the image and fill in the following json string:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies = [
"matplotlib<3.9",
"openai>=1.0",
"opencv-python-headless>=4.11.0.86",
"pydantic>=2.11.7",
"pyyaml>=6.0.2",
"sentence-transformers>=5.1.0",
"streamlit>=1.26.0",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_superanimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def test_superanimal():
# the dummy video only contains 6 frames.
kwargs = {
'video_info.scene_frame_number': 1,
'llm_info.gpt_model': "gpt-4o"
'llm_info.gpt_model': "qwen/qwen2.5-vl-72b-instruct:free"
}
data_folder = "examples/DummyVideo"
result_folder = "temp_result_folder"
Expand Down